Merge pull request #2549 from weinbe2/fea-snap-ui-aosoa
Kokkos SNAP optimizations --- Recursive polynomial evaluation optimizations, improved parallelism, various C++11-isms
This commit is contained in:
@ -115,8 +115,8 @@ The optional keyword *chunksize* is only applicable when using the
|
||||
the KOKKOS package and is ignored otherwise. This keyword controls
|
||||
the number of atoms in each pass used to compute the bond-orientational
|
||||
order parameters and is used to avoid running out of memory. For example
|
||||
if there are 4000 atoms in the simulation and the *chunksize*
|
||||
is set to 2000, the parameter calculation will be broken up
|
||||
if there are 32768 atoms in the simulation and the *chunksize*
|
||||
is set to 16384, the parameter calculation will be broken up
|
||||
into two passes.
|
||||
|
||||
The value of :math:`Q_l` is set to zero for atoms not in the
|
||||
@ -193,7 +193,7 @@ Default
|
||||
|
||||
The option defaults are *cutoff* = pair style cutoff, *nnn* = 12,
|
||||
*degrees* = 5 4 6 8 10 12 i.e. :math:`Q_4`, :math:`Q_6`, :math:`Q_8`, :math:`Q_{10}`, and :math:`Q_{12}`,
|
||||
*wl* = no, *wl/hat* = no, *components* off, and *chunksize* = 2000
|
||||
*wl* = no, *wl/hat* = no, *components* off, and *chunksize* = 16384
|
||||
|
||||
----------
|
||||
|
||||
|
||||
@ -152,7 +152,7 @@ The default values for these keywords are
|
||||
* *chemflag* = 0
|
||||
* *bnormflag* = 0
|
||||
* *wselfallflag* = 0
|
||||
* *chunksize* = 2000
|
||||
* *chunksize* = 4096
|
||||
|
||||
If *quadraticflag* is set to 1, then the SNAP energy expression includes additional quadratic terms
|
||||
that have been shown to increase the overall accuracy of the potential without much increase
|
||||
@ -189,8 +189,8 @@ pair style *snap* with the KOKKOS package and is ignored otherwise.
|
||||
This keyword controls
|
||||
the number of atoms in each pass used to compute the bispectrum
|
||||
components and is used to avoid running out of memory. For example
|
||||
if there are 4000 atoms in the simulation and the *chunksize*
|
||||
is set to 2000, the bispectrum calculation will be broken up
|
||||
if there are 8192 atoms in the simulation and the *chunksize*
|
||||
is set to 4096, the bispectrum calculation will be broken up
|
||||
into two passes.
|
||||
|
||||
Detailed definitions for all the other keywords
|
||||
|
||||
@ -1076,20 +1076,34 @@ struct params_lj_coul {
|
||||
|
||||
// Pair SNAP
|
||||
|
||||
#define SNAP_KOKKOS_REAL double
|
||||
#define SNAP_KOKKOS_HOST_VECLEN 1
|
||||
|
||||
#ifdef LMP_KOKKOS_GPU
|
||||
#define SNAP_KOKKOS_DEVICE_VECLEN 32
|
||||
#else
|
||||
#define SNAP_KOKKOS_DEVICE_VECLEN 1
|
||||
#endif
|
||||
|
||||
|
||||
// intentional: SNAreal/complex gets reused beyond SNAP
|
||||
typedef double SNAreal;
|
||||
|
||||
//typedef struct { SNAreal re, im; } SNAcomplex;
|
||||
template <typename real>
|
||||
struct alignas(2*sizeof(real)) SNAComplex
|
||||
template <typename real_type_>
|
||||
struct alignas(2*sizeof(real_type_)) SNAComplex
|
||||
{
|
||||
real re,im;
|
||||
using real_type = real_type_;
|
||||
using complex = SNAComplex<real_type>;
|
||||
real_type re,im;
|
||||
|
||||
SNAComplex() = default;
|
||||
KOKKOS_FORCEINLINE_FUNCTION SNAComplex()
|
||||
: re(static_cast<real_type>(0.)), im(static_cast<real_type>(0.)) { ; }
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION SNAComplex(real re)
|
||||
: re(re), im(static_cast<real>(0.)) { ; }
|
||||
KOKKOS_FORCEINLINE_FUNCTION SNAComplex(real_type re)
|
||||
: re(re), im(static_cast<real_type>(0.)) { ; }
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION SNAComplex(real re, real im)
|
||||
KOKKOS_FORCEINLINE_FUNCTION SNAComplex(real_type re, real_type im)
|
||||
: re(re), im(im) { ; }
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION SNAComplex(const SNAComplex& other)
|
||||
@ -1117,27 +1131,24 @@ struct alignas(2*sizeof(real)) SNAComplex
|
||||
return *this;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static constexpr complex zero() { return complex(static_cast<real_type>(0.), static_cast<real_type>(0.)); }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static constexpr complex one() { return complex(static_cast<real_type>(1.), static_cast<real_type>(0.)); }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
const complex conj() { return complex(re, -im); }
|
||||
|
||||
};
|
||||
|
||||
template <typename real>
|
||||
KOKKOS_FORCEINLINE_FUNCTION SNAComplex<real> operator*(const real& r, const SNAComplex<real>& self) {
|
||||
return SNAComplex<real>(r*self.re, r*self.im);
|
||||
template <typename real_type>
|
||||
KOKKOS_FORCEINLINE_FUNCTION SNAComplex<real_type> operator*(const real_type& r, const SNAComplex<real_type>& self) {
|
||||
return SNAComplex<real_type>(r*self.re, r*self.im);
|
||||
}
|
||||
|
||||
typedef SNAComplex<SNAreal> SNAcomplex;
|
||||
|
||||
// Cayley-Klein pack
|
||||
// Can guarantee it's aligned to 2 complex
|
||||
struct alignas(32) CayleyKleinPack {
|
||||
|
||||
SNAcomplex a, b;
|
||||
SNAcomplex da[3], db[3];
|
||||
SNAreal sfac;
|
||||
SNAreal dsfacu[3];
|
||||
|
||||
};
|
||||
|
||||
|
||||
#if defined(KOKKOS_ENABLE_CXX11)
|
||||
#undef ISFINITE
|
||||
#define ISFINITE(x) std::isfinite(x)
|
||||
|
||||
@ -15,9 +15,11 @@
|
||||
#include "pair_snap_kokkos_impl.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
template class PairSNAPKokkos<LMPDeviceType>;
|
||||
|
||||
template class PairSNAPKokkosDevice<LMPDeviceType>;
|
||||
#ifdef LMP_KOKKOS_GPU
|
||||
template class PairSNAPKokkos<LMPHostType>;
|
||||
template class PairSNAPKokkosHost<LMPHostType>;
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
|
||||
@ -13,9 +13,13 @@
|
||||
|
||||
#ifdef PAIR_CLASS
|
||||
|
||||
PairStyle(snap/kk,PairSNAPKokkos<LMPDeviceType>)
|
||||
PairStyle(snap/kk/device,PairSNAPKokkos<LMPDeviceType>)
|
||||
PairStyle(snap/kk/host,PairSNAPKokkos<LMPHostType>)
|
||||
PairStyle(snap/kk,PairSNAPKokkosDevice<LMPDeviceType>)
|
||||
PairStyle(snap/kk/device,PairSNAPKokkosDevice<LMPDeviceType>)
|
||||
#ifdef LMP_KOKKOS_GPU
|
||||
PairStyle(snap/kk/host,PairSNAPKokkosHost<LMPHostType>)
|
||||
#else
|
||||
PairStyle(snap/kk/host,PairSNAPKokkosDevice<LMPHostType>)
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
@ -33,9 +37,11 @@ namespace LAMMPS_NS {
|
||||
// Routines for both the CPU and GPU backend
|
||||
template<int NEIGHFLAG, int EVFLAG>
|
||||
struct TagPairSNAPComputeForce{};
|
||||
struct TagPairSNAPComputeNeigh{};
|
||||
|
||||
|
||||
// GPU backend only
|
||||
struct TagPairSNAPComputeNeigh{};
|
||||
struct TagPairSNAPComputeCayleyKlein{};
|
||||
struct TagPairSNAPPreUi{};
|
||||
struct TagPairSNAPComputeUi{};
|
||||
struct TagPairSNAPTransformUi{}; // re-order ulisttot from SoA to AoSoA, zero ylist
|
||||
@ -44,10 +50,10 @@ struct TagPairSNAPBeta{};
|
||||
struct TagPairSNAPComputeBi{};
|
||||
struct TagPairSNAPTransformBi{}; // re-order blist from AoSoA to AoS
|
||||
struct TagPairSNAPComputeYi{};
|
||||
struct TagPairSNAPTransformYi{}; // re-order ylist from AoSoA to AoS
|
||||
struct TagPairSNAPComputeFusedDeidrj{};
|
||||
|
||||
// CPU backend only
|
||||
struct TagPairSNAPComputeNeighCPU{};
|
||||
struct TagPairSNAPPreUiCPU{};
|
||||
struct TagPairSNAPComputeUiCPU{};
|
||||
struct TagPairSNAPTransformUiCPU{};
|
||||
@ -59,7 +65,7 @@ struct TagPairSNAPComputeYiCPU{};
|
||||
struct TagPairSNAPComputeDuidrjCPU{};
|
||||
struct TagPairSNAPComputeDeidrjCPU{};
|
||||
|
||||
template<class DeviceType>
|
||||
template<class DeviceType, typename real_type_, int vector_length_>
|
||||
class PairSNAPKokkos : public PairSNAP {
|
||||
public:
|
||||
enum {EnabledNeighFlags=FULL|HALF|HALFTHREAD};
|
||||
@ -68,6 +74,14 @@ public:
|
||||
typedef ArrayTypes<DeviceType> AT;
|
||||
typedef EV_FLOAT value_type;
|
||||
|
||||
static constexpr int vector_length = vector_length_;
|
||||
using real_type = real_type_;
|
||||
using complex = SNAComplex<real_type>;
|
||||
|
||||
// type-dependent team sizes
|
||||
static constexpr int team_size_compute_ui = sizeof(real_type) == 4 ? 8 : 4;
|
||||
static constexpr int team_size_compute_fused_deidrj = sizeof(real_type) == 4 ? 4 : 2;
|
||||
|
||||
PairSNAPKokkos(class LAMMPS *);
|
||||
~PairSNAPKokkos();
|
||||
|
||||
@ -78,10 +92,10 @@ public:
|
||||
double memory_usage();
|
||||
|
||||
template<class TagStyle>
|
||||
void check_team_size_for(int, int&, int);
|
||||
void check_team_size_for(int, int&);
|
||||
|
||||
template<class TagStyle>
|
||||
void check_team_size_reduce(int, int&, int);
|
||||
void check_team_size_reduce(int, int&);
|
||||
|
||||
template<int NEIGHFLAG, int EVFLAG>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
@ -91,15 +105,18 @@ public:
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (TagPairSNAPComputeForce<NEIGHFLAG,EVFLAG>,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeForce<NEIGHFLAG,EVFLAG> >::member_type& team, EV_FLOAT&) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (TagPairSNAPComputeNeigh,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeNeigh>::member_type& team) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (TagPairSNAPBetaCPU,const int& ii) const;
|
||||
|
||||
// GPU backend only
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (TagPairSNAPPreUi,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPPreUi>::member_type& team) const;
|
||||
void operator() (TagPairSNAPComputeNeigh,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeNeigh>::member_type& team) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (TagPairSNAPComputeCayleyKlein, const int iatom_mod, const int jnbor, const int iatom_div) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (TagPairSNAPPreUi,const int iatom_mod, const int j, const int iatom_div) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (TagPairSNAPComputeUi,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeUi>::member_type& team) const;
|
||||
@ -122,13 +139,13 @@ public:
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (TagPairSNAPComputeYi,const int iatom_mod, const int idxz, const int iatom_div) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (TagPairSNAPTransformYi,const int iatom_mod, const int idxu, const int iatom_div) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (TagPairSNAPComputeFusedDeidrj,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeFusedDeidrj>::member_type& team) const;
|
||||
|
||||
// CPU backend only
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (TagPairSNAPComputeNeighCPU,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeNeighCPU>::member_type& team) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (TagPairSNAPPreUiCPU,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPPreUiCPU>::member_type& team) const;
|
||||
|
||||
@ -173,7 +190,7 @@ protected:
|
||||
t_bvec bvec;
|
||||
typedef Kokkos::View<F_FLOAT***> t_dbvec;
|
||||
t_dbvec dbvec;
|
||||
SNAKokkos<DeviceType> snaKK;
|
||||
SNAKokkos<DeviceType, real_type, vector_length> snaKK;
|
||||
|
||||
int inum,max_neighs,chunk_size,chunk_offset;
|
||||
int host_flag;
|
||||
@ -208,14 +225,14 @@ inline double dist2(double* x,double* y);
|
||||
Kokkos::View<F_FLOAT****, Kokkos::LayoutRight, DeviceType> i_uarraytot_r, i_uarraytot_i;
|
||||
Kokkos::View<F_FLOAT******, Kokkos::LayoutRight, DeviceType> i_zarray_r, i_zarray_i;
|
||||
|
||||
Kokkos::View<F_FLOAT*, DeviceType> d_radelem; // element radii
|
||||
Kokkos::View<F_FLOAT*, DeviceType> d_wjelem; // elements weights
|
||||
Kokkos::View<F_FLOAT**, Kokkos::LayoutRight, DeviceType> d_coeffelem; // element bispectrum coefficients
|
||||
Kokkos::View<real_type*, DeviceType> d_radelem; // element radii
|
||||
Kokkos::View<real_type*, DeviceType> d_wjelem; // elements weights
|
||||
Kokkos::View<real_type**, Kokkos::LayoutRight, DeviceType> d_coeffelem; // element bispectrum coefficients
|
||||
Kokkos::View<T_INT*, DeviceType> d_map; // mapping from atom types to elements
|
||||
Kokkos::View<T_INT*, DeviceType> d_ninside; // ninside for all atoms in list
|
||||
Kokkos::View<F_FLOAT**, DeviceType> d_beta; // betas for all atoms in list
|
||||
Kokkos::View<F_FLOAT***, Kokkos::LayoutLeft, DeviceType> d_beta_pack; // betas for all atoms in list, GPU
|
||||
Kokkos::View<F_FLOAT**, DeviceType> d_bispectrum; // bispectrum components for all atoms in list
|
||||
Kokkos::View<real_type**, DeviceType> d_beta; // betas for all atoms in list
|
||||
Kokkos::View<real_type***, Kokkos::LayoutLeft, DeviceType> d_beta_pack; // betas for all atoms in list, GPU
|
||||
Kokkos::View<real_type**, DeviceType> d_bispectrum; // bispectrum components for all atoms in list
|
||||
|
||||
typedef Kokkos::DualView<F_FLOAT**, DeviceType> tdual_fparams;
|
||||
tdual_fparams k_cutsq;
|
||||
@ -237,6 +254,49 @@ inline double dist2(double* x,double* y);
|
||||
|
||||
};
|
||||
|
||||
|
||||
// These wrapper classes exist to make the pair style factory happy/avoid having
|
||||
// to extend the pair style factory to support Pair classes w/an arbitrary number
|
||||
// of extra template parameters
|
||||
|
||||
template <class DeviceType>
|
||||
class PairSNAPKokkosDevice : public PairSNAPKokkos<DeviceType, SNAP_KOKKOS_REAL, SNAP_KOKKOS_DEVICE_VECLEN> {
|
||||
|
||||
private:
|
||||
using Base = PairSNAPKokkos<DeviceType, SNAP_KOKKOS_REAL, SNAP_KOKKOS_DEVICE_VECLEN>;
|
||||
|
||||
public:
|
||||
|
||||
PairSNAPKokkosDevice(class LAMMPS *);
|
||||
|
||||
void coeff(int, char**);
|
||||
void init_style();
|
||||
double init_one(int, int);
|
||||
void compute(int, int);
|
||||
double memory_usage();
|
||||
|
||||
};
|
||||
|
||||
#ifdef LMP_KOKKOS_GPU
|
||||
template <class DeviceType>
|
||||
class PairSNAPKokkosHost : public PairSNAPKokkos<DeviceType, SNAP_KOKKOS_REAL, SNAP_KOKKOS_HOST_VECLEN> {
|
||||
|
||||
private:
|
||||
using Base = PairSNAPKokkos<DeviceType, SNAP_KOKKOS_REAL, SNAP_KOKKOS_HOST_VECLEN>;
|
||||
|
||||
public:
|
||||
|
||||
PairSNAPKokkosHost(class LAMMPS *);
|
||||
|
||||
void coeff(int, char**);
|
||||
void init_style();
|
||||
double init_one(int, int);
|
||||
void compute(int, int);
|
||||
double memory_usage();
|
||||
|
||||
};
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -25,45 +25,78 @@
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
template<class DeviceType>
|
||||
template<typename real_type_, int vector_length_>
|
||||
struct WignerWrapper {
|
||||
using real_type = real_type_;
|
||||
using complex = SNAComplex<real_type>;
|
||||
static constexpr int vector_length = vector_length_;
|
||||
|
||||
const int offset; // my offset into the vector (0, ..., vector_length - 1)
|
||||
real_type* buffer; // buffer of real numbers
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
WignerWrapper(complex* buffer_, const int offset_)
|
||||
: offset(offset_), buffer(reinterpret_cast<real_type*>(buffer_))
|
||||
{ ; }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
complex get(const int& ma) {
|
||||
return complex(buffer[offset + 2 * vector_length * ma], buffer[offset + vector_length + 2 * vector_length * ma]);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void set(const int& ma, const complex& store) {
|
||||
buffer[offset + 2 * vector_length * ma] = store.re;
|
||||
buffer[offset + vector_length + 2 * vector_length * ma] = store.im;
|
||||
}
|
||||
};
|
||||
|
||||
struct alignas(8) FullHalfMapper {
|
||||
int idxu_half;
|
||||
int flip_sign; // 0 -> isn't flipped, 1 -> conj, -1 -> -conj
|
||||
};
|
||||
|
||||
template<class DeviceType, typename real_type_, int vector_length_>
|
||||
class SNAKokkos {
|
||||
|
||||
public:
|
||||
using real_type = real_type_;
|
||||
using complex = SNAComplex<real_type>;
|
||||
static constexpr int vector_length = vector_length_;
|
||||
|
||||
typedef Kokkos::View<int*, DeviceType> t_sna_1i;
|
||||
typedef Kokkos::View<double*, DeviceType> t_sna_1d;
|
||||
typedef Kokkos::View<double*, typename KKDevice<DeviceType>::value, Kokkos::MemoryTraits<Kokkos::Atomic> > t_sna_1d_atomic;
|
||||
typedef Kokkos::View<real_type*, DeviceType> t_sna_1d;
|
||||
typedef Kokkos::View<real_type*, typename KKDevice<DeviceType>::value, Kokkos::MemoryTraits<Kokkos::Atomic> > t_sna_1d_atomic;
|
||||
typedef Kokkos::View<int**, DeviceType> t_sna_2i;
|
||||
typedef Kokkos::View<double**, DeviceType> t_sna_2d;
|
||||
typedef Kokkos::View<double**, Kokkos::LayoutLeft, DeviceType> t_sna_2d_ll;
|
||||
typedef Kokkos::View<double***, DeviceType> t_sna_3d;
|
||||
typedef Kokkos::View<double***, Kokkos::LayoutLeft, DeviceType> t_sna_3d_ll;
|
||||
typedef Kokkos::View<double***[3], DeviceType> t_sna_4d;
|
||||
typedef Kokkos::View<double****, Kokkos::LayoutLeft, DeviceType> t_sna_4d_ll;
|
||||
typedef Kokkos::View<double**[3], DeviceType> t_sna_3d3;
|
||||
typedef Kokkos::View<double*****, DeviceType> t_sna_5d;
|
||||
typedef Kokkos::View<real_type**, DeviceType> t_sna_2d;
|
||||
typedef Kokkos::View<real_type**, Kokkos::LayoutLeft, DeviceType> t_sna_2d_ll;
|
||||
typedef Kokkos::View<real_type***, DeviceType> t_sna_3d;
|
||||
typedef Kokkos::View<real_type***, Kokkos::LayoutLeft, DeviceType> t_sna_3d_ll;
|
||||
typedef Kokkos::View<real_type***[3], DeviceType> t_sna_4d;
|
||||
typedef Kokkos::View<real_type****, Kokkos::LayoutLeft, DeviceType> t_sna_4d_ll;
|
||||
typedef Kokkos::View<real_type**[3], DeviceType> t_sna_3d3;
|
||||
typedef Kokkos::View<real_type*****, DeviceType> t_sna_5d;
|
||||
|
||||
typedef Kokkos::View<SNAcomplex*, DeviceType> t_sna_1c;
|
||||
typedef Kokkos::View<SNAcomplex*, typename KKDevice<DeviceType>::value, Kokkos::MemoryTraits<Kokkos::Atomic> > t_sna_1c_atomic;
|
||||
typedef Kokkos::View<SNAcomplex**, DeviceType> t_sna_2c;
|
||||
typedef Kokkos::View<SNAcomplex**, Kokkos::LayoutLeft, DeviceType> t_sna_2c_ll;
|
||||
typedef Kokkos::View<SNAcomplex**, Kokkos::LayoutRight, DeviceType> t_sna_2c_lr;
|
||||
typedef Kokkos::View<SNAcomplex***, DeviceType> t_sna_3c;
|
||||
typedef Kokkos::View<SNAcomplex***, Kokkos::LayoutLeft, DeviceType> t_sna_3c_ll;
|
||||
typedef Kokkos::View<SNAcomplex***[3], DeviceType> t_sna_4c;
|
||||
typedef Kokkos::View<SNAcomplex***[3], Kokkos::LayoutLeft, DeviceType> t_sna_4c3_ll;
|
||||
typedef Kokkos::View<SNAcomplex****, Kokkos::LayoutLeft, DeviceType> t_sna_4c_ll;
|
||||
typedef Kokkos::View<SNAcomplex**[3], DeviceType> t_sna_3c3;
|
||||
typedef Kokkos::View<SNAcomplex*****, DeviceType> t_sna_5c;
|
||||
|
||||
typedef Kokkos::View<CayleyKleinPack**, DeviceType> t_sna_2ckp;
|
||||
typedef Kokkos::View<complex*, DeviceType> t_sna_1c;
|
||||
typedef Kokkos::View<complex*, typename KKDevice<DeviceType>::value, Kokkos::MemoryTraits<Kokkos::Atomic> > t_sna_1c_atomic;
|
||||
typedef Kokkos::View<complex**, DeviceType> t_sna_2c;
|
||||
typedef Kokkos::View<complex**, Kokkos::LayoutLeft, DeviceType> t_sna_2c_ll;
|
||||
typedef Kokkos::View<complex**, Kokkos::LayoutRight, DeviceType> t_sna_2c_lr;
|
||||
typedef Kokkos::View<complex***, DeviceType> t_sna_3c;
|
||||
typedef Kokkos::View<complex***, Kokkos::LayoutLeft, DeviceType> t_sna_3c_ll;
|
||||
typedef Kokkos::View<complex***[3], DeviceType> t_sna_4c;
|
||||
typedef Kokkos::View<complex***[3], Kokkos::LayoutLeft, DeviceType> t_sna_4c3_ll;
|
||||
typedef Kokkos::View<complex****, Kokkos::LayoutLeft, DeviceType> t_sna_4c_ll;
|
||||
typedef Kokkos::View<complex**[3], DeviceType> t_sna_3c3;
|
||||
typedef Kokkos::View<complex*****, DeviceType> t_sna_5c;
|
||||
|
||||
inline
|
||||
SNAKokkos() {};
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
SNAKokkos(const SNAKokkos<DeviceType>& sna, const typename Kokkos::TeamPolicy<DeviceType>::member_type& team);
|
||||
SNAKokkos(const SNAKokkos<DeviceType,real_type,vector_length>& sna, const typename Kokkos::TeamPolicy<DeviceType>::member_type& team);
|
||||
|
||||
inline
|
||||
SNAKokkos(double, int, double, int, int, int, int, int, int);
|
||||
SNAKokkos(real_type, int, real_type, int, int, int, int, int, int);
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
~SNAKokkos();
|
||||
@ -81,17 +114,16 @@ inline
|
||||
|
||||
// functions for bispectrum coefficients, GPU only
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void compute_cayley_klein(const int&, const int&, const double&, const double&,
|
||||
const double&, const double&, const double&);
|
||||
void compute_cayley_klein(const int&, const int&, const int&);
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void pre_ui(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team,const int&,const int&); // ForceSNAP
|
||||
void pre_ui(const int&, const int&, const int&, const int&); // ForceSNAP
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void compute_ui(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, const int, const int); // ForceSNAP
|
||||
void compute_ui(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, const int, const int, const int, const int); // ForceSNAP
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void compute_zi(const int&, const int&, const int&); // ForceSNAP
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void compute_yi(int,int,int,
|
||||
const Kokkos::View<F_FLOAT***, Kokkos::LayoutLeft, DeviceType> &beta_pack); // ForceSNAP
|
||||
const Kokkos::View<real_type***, Kokkos::LayoutLeft, DeviceType> &beta_pack); // ForceSNAP
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void compute_bi(const int&, const int&, const int&); // ForceSNAP
|
||||
|
||||
@ -104,34 +136,33 @@ inline
|
||||
void compute_zi_cpu(const int&); // ForceSNAP
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void compute_yi_cpu(int,
|
||||
const Kokkos::View<F_FLOAT**, DeviceType> &beta); // ForceSNAP
|
||||
const Kokkos::View<real_type**, DeviceType> &beta); // ForceSNAP
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void compute_bi_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int); // ForceSNAP
|
||||
|
||||
// functions for derivatives, GPU only
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void compute_fused_deidrj(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, const int, const int); //ForceSNAP
|
||||
void compute_fused_deidrj(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, const int, const int, const int, const int); //ForceSNAP
|
||||
|
||||
// functions for derivatives, CPU only
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void compute_duidrj_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int, int); //ForceSNAP
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void compute_deidrj_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int, int); // ForceSNAP
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
double compute_sfac(double, double); // add_uarraytot, compute_duarray
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
double compute_dsfac(double, double); // compute_duarray
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void compute_s_dsfac(const double, const double, double&, double&); // compute_cayley_klein
|
||||
|
||||
// efficient complex FMA
|
||||
// efficient caxpy (i.e., y += a x)
|
||||
static KOKKOS_FORCEINLINE_FUNCTION
|
||||
void caxpy(const SNAcomplex& a, const SNAcomplex& x, SNAcomplex& y);
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
real_type compute_sfac(real_type, real_type); // add_uarraytot, compute_duarray
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
real_type compute_dsfac(real_type, real_type); // compute_duarray
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void compute_s_dsfac(const real_type, const real_type, real_type&, real_type&); // compute_cayley_klein
|
||||
|
||||
// efficient complex FMA, conjugate of scalar
|
||||
static KOKKOS_FORCEINLINE_FUNCTION
|
||||
void caconjxpy(const SNAcomplex& a, const SNAcomplex& x, SNAcomplex& y);
|
||||
void sincos_wrapper(double x, double* sin_, double *cos_) { sincos(x, sin_, cos_); }
|
||||
static KOKKOS_FORCEINLINE_FUNCTION
|
||||
void sincos_wrapper(float x, float* sin_, float *cos_) { sincosf(x, sin_, cos_); }
|
||||
|
||||
// Set the direction for split ComputeDuidrj
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
@ -146,10 +177,6 @@ inline
|
||||
|
||||
//per sna class instance for OMP use
|
||||
|
||||
// Alternative to rij, wj, rcutij...
|
||||
// just calculate everything up front
|
||||
t_sna_2ckp cayleyklein;
|
||||
|
||||
// Per InFlight Particle
|
||||
t_sna_3d rij;
|
||||
t_sna_2i inside;
|
||||
@ -175,8 +202,14 @@ inline
|
||||
t_sna_4c3_ll dulist;
|
||||
|
||||
// Modified structures for GPU backend
|
||||
t_sna_3d_ll ulisttot_re; // split real,
|
||||
t_sna_3d_ll ulisttot_im; // imag
|
||||
t_sna_3c_ll a_pack; // Cayley-Klein `a`
|
||||
t_sna_3c_ll b_pack; // `b`
|
||||
t_sna_4c_ll da_pack; // `da`
|
||||
t_sna_4c_ll db_pack; // `db`
|
||||
t_sna_4d_ll sfac_pack; // sfac, dsfac_{x,y,z}
|
||||
|
||||
t_sna_4d_ll ulisttot_re_pack; // split real,
|
||||
t_sna_4d_ll ulisttot_im_pack; // imag, AoSoA, flattened
|
||||
t_sna_4c_ll ulisttot_pack; // AoSoA layout
|
||||
t_sna_4c_ll zlist_pack; // AoSoA layout
|
||||
t_sna_4d_ll blist_pack;
|
||||
@ -191,7 +224,7 @@ inline
|
||||
int ntriples;
|
||||
|
||||
private:
|
||||
double rmin0, rfac0;
|
||||
real_type rmin0, rfac0;
|
||||
|
||||
//use indexlist instead of loops, constructor generates these
|
||||
// Same across all SNAKokkos
|
||||
@ -203,6 +236,7 @@ public:
|
||||
Kokkos::View<int*, DeviceType> idxu_block;
|
||||
Kokkos::View<int*, DeviceType> idxu_half_block;
|
||||
Kokkos::View<int*, DeviceType> idxu_cache_block;
|
||||
Kokkos::View<FullHalfMapper*, DeviceType> idxu_full_half;
|
||||
|
||||
private:
|
||||
Kokkos::View<int***, DeviceType> idxz_block;
|
||||
@ -231,12 +265,12 @@ inline
|
||||
void init_rootpqarray(); // init()
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void add_uarraytot(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int, int, double, double, double, int); // compute_ui
|
||||
void add_uarraytot(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int, int, const real_type&, const real_type&, const real_type&, int); // compute_ui
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void compute_uarray_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int, int,
|
||||
double, double, double,
|
||||
double, double); // compute_ui_cpu
|
||||
const real_type&, const real_type&, const real_type&,
|
||||
const real_type&, const real_type&); // compute_ui_cpu
|
||||
|
||||
|
||||
inline
|
||||
@ -246,8 +280,8 @@ inline
|
||||
int compute_ncoeff(); // SNAKokkos()
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void compute_duarray_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int, int,
|
||||
double, double, double, // compute_duidrj_cpu
|
||||
double, double, double, double, double);
|
||||
const real_type&, const real_type&, const real_type&, // compute_duidrj_cpu
|
||||
const real_type&, const real_type&, const real_type&, const real_type&, const real_type&);
|
||||
|
||||
// Sets the style for the switching function
|
||||
// 0 = none
|
||||
@ -259,11 +293,11 @@ inline
|
||||
int bnorm_flag;
|
||||
|
||||
// Self-weight
|
||||
double wself;
|
||||
real_type wself;
|
||||
int wselfall_flag;
|
||||
|
||||
int bzero_flag; // 1 if bzero subtracted from barray
|
||||
Kokkos::View<double*, DeviceType> bzero; // array of B values for isolated atoms
|
||||
Kokkos::View<real_type*, DeviceType> bzero; // array of B values for isolated atoms
|
||||
|
||||
// for per-direction dulist calculation, specify the direction.
|
||||
int dir;
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -657,7 +657,7 @@ void PairSNAP::read_files(char *coefffilename, char *paramfilename)
|
||||
chemflag = 0;
|
||||
bnormflag = 0;
|
||||
wselfallflag = 0;
|
||||
chunksize = 2000;
|
||||
chunksize = 4096;
|
||||
|
||||
// open SNAP parameter file on proc 0
|
||||
|
||||
|
||||
Reference in New Issue
Block a user