Merge pull request #2549 from weinbe2/fea-snap-ui-aosoa

Kokkos SNAP optimizations --- Recursive polynomial evaluation optimizations, improved parallelism, various C++11-isms
This commit is contained in:
Axel Kohlmeyer
2021-01-05 15:17:26 -05:00
committed by GitHub
9 changed files with 1356 additions and 1050 deletions

View File

@ -115,8 +115,8 @@ The optional keyword *chunksize* is only applicable when using the
the KOKKOS package and is ignored otherwise. This keyword controls
the number of atoms in each pass used to compute the bond-orientational
order parameters and is used to avoid running out of memory. For example
if there are 4000 atoms in the simulation and the *chunksize*
is set to 2000, the parameter calculation will be broken up
if there are 32768 atoms in the simulation and the *chunksize*
is set to 16384, the parameter calculation will be broken up
into two passes.
The value of :math:`Q_l` is set to zero for atoms not in the
@ -193,7 +193,7 @@ Default
The option defaults are *cutoff* = pair style cutoff, *nnn* = 12,
*degrees* = 5 4 6 8 10 12 i.e. :math:`Q_4`, :math:`Q_6`, :math:`Q_8`, :math:`Q_{10}`, and :math:`Q_{12}`,
*wl* = no, *wl/hat* = no, *components* off, and *chunksize* = 2000
*wl* = no, *wl/hat* = no, *components* off, and *chunksize* = 16384
----------

View File

@ -152,7 +152,7 @@ The default values for these keywords are
* *chemflag* = 0
* *bnormflag* = 0
* *wselfallflag* = 0
* *chunksize* = 2000
* *chunksize* = 4096
If *quadraticflag* is set to 1, then the SNAP energy expression includes additional quadratic terms
that have been shown to increase the overall accuracy of the potential without much increase
@ -189,8 +189,8 @@ pair style *snap* with the KOKKOS package and is ignored otherwise.
This keyword controls
the number of atoms in each pass used to compute the bispectrum
components and is used to avoid running out of memory. For example
if there are 4000 atoms in the simulation and the *chunksize*
is set to 2000, the bispectrum calculation will be broken up
if there are 8192 atoms in the simulation and the *chunksize*
is set to 4096, the bispectrum calculation will be broken up
into two passes.
Detailed definitions for all the other keywords

View File

@ -1076,20 +1076,34 @@ struct params_lj_coul {
// Pair SNAP
#define SNAP_KOKKOS_REAL double
#define SNAP_KOKKOS_HOST_VECLEN 1
#ifdef LMP_KOKKOS_GPU
#define SNAP_KOKKOS_DEVICE_VECLEN 32
#else
#define SNAP_KOKKOS_DEVICE_VECLEN 1
#endif
// intentional: SNAreal/complex gets reused beyond SNAP
typedef double SNAreal;
//typedef struct { SNAreal re, im; } SNAcomplex;
template <typename real>
struct alignas(2*sizeof(real)) SNAComplex
template <typename real_type_>
struct alignas(2*sizeof(real_type_)) SNAComplex
{
real re,im;
using real_type = real_type_;
using complex = SNAComplex<real_type>;
real_type re,im;
SNAComplex() = default;
KOKKOS_FORCEINLINE_FUNCTION SNAComplex()
: re(static_cast<real_type>(0.)), im(static_cast<real_type>(0.)) { ; }
KOKKOS_FORCEINLINE_FUNCTION SNAComplex(real re)
: re(re), im(static_cast<real>(0.)) { ; }
KOKKOS_FORCEINLINE_FUNCTION SNAComplex(real_type re)
: re(re), im(static_cast<real_type>(0.)) { ; }
KOKKOS_FORCEINLINE_FUNCTION SNAComplex(real re, real im)
KOKKOS_FORCEINLINE_FUNCTION SNAComplex(real_type re, real_type im)
: re(re), im(im) { ; }
KOKKOS_FORCEINLINE_FUNCTION SNAComplex(const SNAComplex& other)
@ -1117,27 +1131,24 @@ struct alignas(2*sizeof(real)) SNAComplex
return *this;
}
KOKKOS_INLINE_FUNCTION
static constexpr complex zero() { return complex(static_cast<real_type>(0.), static_cast<real_type>(0.)); }
KOKKOS_INLINE_FUNCTION
static constexpr complex one() { return complex(static_cast<real_type>(1.), static_cast<real_type>(0.)); }
KOKKOS_INLINE_FUNCTION
const complex conj() { return complex(re, -im); }
};
template <typename real>
KOKKOS_FORCEINLINE_FUNCTION SNAComplex<real> operator*(const real& r, const SNAComplex<real>& self) {
return SNAComplex<real>(r*self.re, r*self.im);
template <typename real_type>
KOKKOS_FORCEINLINE_FUNCTION SNAComplex<real_type> operator*(const real_type& r, const SNAComplex<real_type>& self) {
return SNAComplex<real_type>(r*self.re, r*self.im);
}
typedef SNAComplex<SNAreal> SNAcomplex;
// Cayley-Klein pack
// Can guarantee it's aligned to 2 complex
struct alignas(32) CayleyKleinPack {
SNAcomplex a, b;
SNAcomplex da[3], db[3];
SNAreal sfac;
SNAreal dsfacu[3];
};
#if defined(KOKKOS_ENABLE_CXX11)
#undef ISFINITE
#define ISFINITE(x) std::isfinite(x)

View File

@ -15,9 +15,11 @@
#include "pair_snap_kokkos_impl.h"
namespace LAMMPS_NS {
template class PairSNAPKokkos<LMPDeviceType>;
template class PairSNAPKokkosDevice<LMPDeviceType>;
#ifdef LMP_KOKKOS_GPU
template class PairSNAPKokkos<LMPHostType>;
template class PairSNAPKokkosHost<LMPHostType>;
#endif
}

View File

@ -13,9 +13,13 @@
#ifdef PAIR_CLASS
PairStyle(snap/kk,PairSNAPKokkos<LMPDeviceType>)
PairStyle(snap/kk/device,PairSNAPKokkos<LMPDeviceType>)
PairStyle(snap/kk/host,PairSNAPKokkos<LMPHostType>)
PairStyle(snap/kk,PairSNAPKokkosDevice<LMPDeviceType>)
PairStyle(snap/kk/device,PairSNAPKokkosDevice<LMPDeviceType>)
#ifdef LMP_KOKKOS_GPU
PairStyle(snap/kk/host,PairSNAPKokkosHost<LMPHostType>)
#else
PairStyle(snap/kk/host,PairSNAPKokkosDevice<LMPHostType>)
#endif
#else
@ -33,9 +37,11 @@ namespace LAMMPS_NS {
// Routines for both the CPU and GPU backend
template<int NEIGHFLAG, int EVFLAG>
struct TagPairSNAPComputeForce{};
struct TagPairSNAPComputeNeigh{};
// GPU backend only
struct TagPairSNAPComputeNeigh{};
struct TagPairSNAPComputeCayleyKlein{};
struct TagPairSNAPPreUi{};
struct TagPairSNAPComputeUi{};
struct TagPairSNAPTransformUi{}; // re-order ulisttot from SoA to AoSoA, zero ylist
@ -44,10 +50,10 @@ struct TagPairSNAPBeta{};
struct TagPairSNAPComputeBi{};
struct TagPairSNAPTransformBi{}; // re-order blist from AoSoA to AoS
struct TagPairSNAPComputeYi{};
struct TagPairSNAPTransformYi{}; // re-order ylist from AoSoA to AoS
struct TagPairSNAPComputeFusedDeidrj{};
// CPU backend only
struct TagPairSNAPComputeNeighCPU{};
struct TagPairSNAPPreUiCPU{};
struct TagPairSNAPComputeUiCPU{};
struct TagPairSNAPTransformUiCPU{};
@ -59,7 +65,7 @@ struct TagPairSNAPComputeYiCPU{};
struct TagPairSNAPComputeDuidrjCPU{};
struct TagPairSNAPComputeDeidrjCPU{};
template<class DeviceType>
template<class DeviceType, typename real_type_, int vector_length_>
class PairSNAPKokkos : public PairSNAP {
public:
enum {EnabledNeighFlags=FULL|HALF|HALFTHREAD};
@ -68,6 +74,14 @@ public:
typedef ArrayTypes<DeviceType> AT;
typedef EV_FLOAT value_type;
static constexpr int vector_length = vector_length_;
using real_type = real_type_;
using complex = SNAComplex<real_type>;
// type-dependent team sizes
static constexpr int team_size_compute_ui = sizeof(real_type) == 4 ? 8 : 4;
static constexpr int team_size_compute_fused_deidrj = sizeof(real_type) == 4 ? 4 : 2;
PairSNAPKokkos(class LAMMPS *);
~PairSNAPKokkos();
@ -78,10 +92,10 @@ public:
double memory_usage();
template<class TagStyle>
void check_team_size_for(int, int&, int);
void check_team_size_for(int, int&);
template<class TagStyle>
void check_team_size_reduce(int, int&, int);
void check_team_size_reduce(int, int&);
template<int NEIGHFLAG, int EVFLAG>
KOKKOS_INLINE_FUNCTION
@ -91,15 +105,18 @@ public:
KOKKOS_INLINE_FUNCTION
void operator() (TagPairSNAPComputeForce<NEIGHFLAG,EVFLAG>,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeForce<NEIGHFLAG,EVFLAG> >::member_type& team, EV_FLOAT&) const;
KOKKOS_INLINE_FUNCTION
void operator() (TagPairSNAPComputeNeigh,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeNeigh>::member_type& team) const;
KOKKOS_INLINE_FUNCTION
void operator() (TagPairSNAPBetaCPU,const int& ii) const;
// GPU backend only
KOKKOS_INLINE_FUNCTION
void operator() (TagPairSNAPPreUi,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPPreUi>::member_type& team) const;
void operator() (TagPairSNAPComputeNeigh,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeNeigh>::member_type& team) const;
KOKKOS_INLINE_FUNCTION
void operator() (TagPairSNAPComputeCayleyKlein, const int iatom_mod, const int jnbor, const int iatom_div) const;
KOKKOS_INLINE_FUNCTION
void operator() (TagPairSNAPPreUi,const int iatom_mod, const int j, const int iatom_div) const;
KOKKOS_INLINE_FUNCTION
void operator() (TagPairSNAPComputeUi,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeUi>::member_type& team) const;
@ -122,13 +139,13 @@ public:
KOKKOS_INLINE_FUNCTION
void operator() (TagPairSNAPComputeYi,const int iatom_mod, const int idxz, const int iatom_div) const;
KOKKOS_INLINE_FUNCTION
void operator() (TagPairSNAPTransformYi,const int iatom_mod, const int idxu, const int iatom_div) const;
KOKKOS_INLINE_FUNCTION
void operator() (TagPairSNAPComputeFusedDeidrj,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeFusedDeidrj>::member_type& team) const;
// CPU backend only
KOKKOS_INLINE_FUNCTION
void operator() (TagPairSNAPComputeNeighCPU,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeNeighCPU>::member_type& team) const;
KOKKOS_INLINE_FUNCTION
void operator() (TagPairSNAPPreUiCPU,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPPreUiCPU>::member_type& team) const;
@ -173,7 +190,7 @@ protected:
t_bvec bvec;
typedef Kokkos::View<F_FLOAT***> t_dbvec;
t_dbvec dbvec;
SNAKokkos<DeviceType> snaKK;
SNAKokkos<DeviceType, real_type, vector_length> snaKK;
int inum,max_neighs,chunk_size,chunk_offset;
int host_flag;
@ -208,14 +225,14 @@ inline double dist2(double* x,double* y);
Kokkos::View<F_FLOAT****, Kokkos::LayoutRight, DeviceType> i_uarraytot_r, i_uarraytot_i;
Kokkos::View<F_FLOAT******, Kokkos::LayoutRight, DeviceType> i_zarray_r, i_zarray_i;
Kokkos::View<F_FLOAT*, DeviceType> d_radelem; // element radii
Kokkos::View<F_FLOAT*, DeviceType> d_wjelem; // elements weights
Kokkos::View<F_FLOAT**, Kokkos::LayoutRight, DeviceType> d_coeffelem; // element bispectrum coefficients
Kokkos::View<real_type*, DeviceType> d_radelem; // element radii
Kokkos::View<real_type*, DeviceType> d_wjelem; // elements weights
Kokkos::View<real_type**, Kokkos::LayoutRight, DeviceType> d_coeffelem; // element bispectrum coefficients
Kokkos::View<T_INT*, DeviceType> d_map; // mapping from atom types to elements
Kokkos::View<T_INT*, DeviceType> d_ninside; // ninside for all atoms in list
Kokkos::View<F_FLOAT**, DeviceType> d_beta; // betas for all atoms in list
Kokkos::View<F_FLOAT***, Kokkos::LayoutLeft, DeviceType> d_beta_pack; // betas for all atoms in list, GPU
Kokkos::View<F_FLOAT**, DeviceType> d_bispectrum; // bispectrum components for all atoms in list
Kokkos::View<real_type**, DeviceType> d_beta; // betas for all atoms in list
Kokkos::View<real_type***, Kokkos::LayoutLeft, DeviceType> d_beta_pack; // betas for all atoms in list, GPU
Kokkos::View<real_type**, DeviceType> d_bispectrum; // bispectrum components for all atoms in list
typedef Kokkos::DualView<F_FLOAT**, DeviceType> tdual_fparams;
tdual_fparams k_cutsq;
@ -237,6 +254,49 @@ inline double dist2(double* x,double* y);
};
// These wrapper classes exist to make the pair style factory happy/avoid having
// to extend the pair style factory to support Pair classes w/an arbitrary number
// of extra template parameters
template <class DeviceType>
class PairSNAPKokkosDevice : public PairSNAPKokkos<DeviceType, SNAP_KOKKOS_REAL, SNAP_KOKKOS_DEVICE_VECLEN> {
private:
using Base = PairSNAPKokkos<DeviceType, SNAP_KOKKOS_REAL, SNAP_KOKKOS_DEVICE_VECLEN>;
public:
PairSNAPKokkosDevice(class LAMMPS *);
void coeff(int, char**);
void init_style();
double init_one(int, int);
void compute(int, int);
double memory_usage();
};
#ifdef LMP_KOKKOS_GPU
template <class DeviceType>
class PairSNAPKokkosHost : public PairSNAPKokkos<DeviceType, SNAP_KOKKOS_REAL, SNAP_KOKKOS_HOST_VECLEN> {
private:
using Base = PairSNAPKokkos<DeviceType, SNAP_KOKKOS_REAL, SNAP_KOKKOS_HOST_VECLEN>;
public:
PairSNAPKokkosHost(class LAMMPS *);
void coeff(int, char**);
void init_style();
double init_one(int, int);
void compute(int, int);
double memory_usage();
};
#endif
}
#endif

File diff suppressed because it is too large Load Diff

View File

@ -25,45 +25,78 @@
namespace LAMMPS_NS {
template<class DeviceType>
template<typename real_type_, int vector_length_>
struct WignerWrapper {
using real_type = real_type_;
using complex = SNAComplex<real_type>;
static constexpr int vector_length = vector_length_;
const int offset; // my offset into the vector (0, ..., vector_length - 1)
real_type* buffer; // buffer of real numbers
KOKKOS_INLINE_FUNCTION
WignerWrapper(complex* buffer_, const int offset_)
: offset(offset_), buffer(reinterpret_cast<real_type*>(buffer_))
{ ; }
KOKKOS_INLINE_FUNCTION
complex get(const int& ma) {
return complex(buffer[offset + 2 * vector_length * ma], buffer[offset + vector_length + 2 * vector_length * ma]);
}
KOKKOS_INLINE_FUNCTION
void set(const int& ma, const complex& store) {
buffer[offset + 2 * vector_length * ma] = store.re;
buffer[offset + vector_length + 2 * vector_length * ma] = store.im;
}
};
struct alignas(8) FullHalfMapper {
int idxu_half;
int flip_sign; // 0 -> isn't flipped, 1 -> conj, -1 -> -conj
};
template<class DeviceType, typename real_type_, int vector_length_>
class SNAKokkos {
public:
using real_type = real_type_;
using complex = SNAComplex<real_type>;
static constexpr int vector_length = vector_length_;
typedef Kokkos::View<int*, DeviceType> t_sna_1i;
typedef Kokkos::View<double*, DeviceType> t_sna_1d;
typedef Kokkos::View<double*, typename KKDevice<DeviceType>::value, Kokkos::MemoryTraits<Kokkos::Atomic> > t_sna_1d_atomic;
typedef Kokkos::View<real_type*, DeviceType> t_sna_1d;
typedef Kokkos::View<real_type*, typename KKDevice<DeviceType>::value, Kokkos::MemoryTraits<Kokkos::Atomic> > t_sna_1d_atomic;
typedef Kokkos::View<int**, DeviceType> t_sna_2i;
typedef Kokkos::View<double**, DeviceType> t_sna_2d;
typedef Kokkos::View<double**, Kokkos::LayoutLeft, DeviceType> t_sna_2d_ll;
typedef Kokkos::View<double***, DeviceType> t_sna_3d;
typedef Kokkos::View<double***, Kokkos::LayoutLeft, DeviceType> t_sna_3d_ll;
typedef Kokkos::View<double***[3], DeviceType> t_sna_4d;
typedef Kokkos::View<double****, Kokkos::LayoutLeft, DeviceType> t_sna_4d_ll;
typedef Kokkos::View<double**[3], DeviceType> t_sna_3d3;
typedef Kokkos::View<double*****, DeviceType> t_sna_5d;
typedef Kokkos::View<real_type**, DeviceType> t_sna_2d;
typedef Kokkos::View<real_type**, Kokkos::LayoutLeft, DeviceType> t_sna_2d_ll;
typedef Kokkos::View<real_type***, DeviceType> t_sna_3d;
typedef Kokkos::View<real_type***, Kokkos::LayoutLeft, DeviceType> t_sna_3d_ll;
typedef Kokkos::View<real_type***[3], DeviceType> t_sna_4d;
typedef Kokkos::View<real_type****, Kokkos::LayoutLeft, DeviceType> t_sna_4d_ll;
typedef Kokkos::View<real_type**[3], DeviceType> t_sna_3d3;
typedef Kokkos::View<real_type*****, DeviceType> t_sna_5d;
typedef Kokkos::View<SNAcomplex*, DeviceType> t_sna_1c;
typedef Kokkos::View<SNAcomplex*, typename KKDevice<DeviceType>::value, Kokkos::MemoryTraits<Kokkos::Atomic> > t_sna_1c_atomic;
typedef Kokkos::View<SNAcomplex**, DeviceType> t_sna_2c;
typedef Kokkos::View<SNAcomplex**, Kokkos::LayoutLeft, DeviceType> t_sna_2c_ll;
typedef Kokkos::View<SNAcomplex**, Kokkos::LayoutRight, DeviceType> t_sna_2c_lr;
typedef Kokkos::View<SNAcomplex***, DeviceType> t_sna_3c;
typedef Kokkos::View<SNAcomplex***, Kokkos::LayoutLeft, DeviceType> t_sna_3c_ll;
typedef Kokkos::View<SNAcomplex***[3], DeviceType> t_sna_4c;
typedef Kokkos::View<SNAcomplex***[3], Kokkos::LayoutLeft, DeviceType> t_sna_4c3_ll;
typedef Kokkos::View<SNAcomplex****, Kokkos::LayoutLeft, DeviceType> t_sna_4c_ll;
typedef Kokkos::View<SNAcomplex**[3], DeviceType> t_sna_3c3;
typedef Kokkos::View<SNAcomplex*****, DeviceType> t_sna_5c;
typedef Kokkos::View<CayleyKleinPack**, DeviceType> t_sna_2ckp;
typedef Kokkos::View<complex*, DeviceType> t_sna_1c;
typedef Kokkos::View<complex*, typename KKDevice<DeviceType>::value, Kokkos::MemoryTraits<Kokkos::Atomic> > t_sna_1c_atomic;
typedef Kokkos::View<complex**, DeviceType> t_sna_2c;
typedef Kokkos::View<complex**, Kokkos::LayoutLeft, DeviceType> t_sna_2c_ll;
typedef Kokkos::View<complex**, Kokkos::LayoutRight, DeviceType> t_sna_2c_lr;
typedef Kokkos::View<complex***, DeviceType> t_sna_3c;
typedef Kokkos::View<complex***, Kokkos::LayoutLeft, DeviceType> t_sna_3c_ll;
typedef Kokkos::View<complex***[3], DeviceType> t_sna_4c;
typedef Kokkos::View<complex***[3], Kokkos::LayoutLeft, DeviceType> t_sna_4c3_ll;
typedef Kokkos::View<complex****, Kokkos::LayoutLeft, DeviceType> t_sna_4c_ll;
typedef Kokkos::View<complex**[3], DeviceType> t_sna_3c3;
typedef Kokkos::View<complex*****, DeviceType> t_sna_5c;
inline
SNAKokkos() {};
KOKKOS_INLINE_FUNCTION
SNAKokkos(const SNAKokkos<DeviceType>& sna, const typename Kokkos::TeamPolicy<DeviceType>::member_type& team);
SNAKokkos(const SNAKokkos<DeviceType,real_type,vector_length>& sna, const typename Kokkos::TeamPolicy<DeviceType>::member_type& team);
inline
SNAKokkos(double, int, double, int, int, int, int, int, int);
SNAKokkos(real_type, int, real_type, int, int, int, int, int, int);
KOKKOS_INLINE_FUNCTION
~SNAKokkos();
@ -81,17 +114,16 @@ inline
// functions for bispectrum coefficients, GPU only
KOKKOS_INLINE_FUNCTION
void compute_cayley_klein(const int&, const int&, const double&, const double&,
const double&, const double&, const double&);
void compute_cayley_klein(const int&, const int&, const int&);
KOKKOS_INLINE_FUNCTION
void pre_ui(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team,const int&,const int&); // ForceSNAP
void pre_ui(const int&, const int&, const int&, const int&); // ForceSNAP
KOKKOS_INLINE_FUNCTION
void compute_ui(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, const int, const int); // ForceSNAP
void compute_ui(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, const int, const int, const int, const int); // ForceSNAP
KOKKOS_INLINE_FUNCTION
void compute_zi(const int&, const int&, const int&); // ForceSNAP
KOKKOS_INLINE_FUNCTION
void compute_yi(int,int,int,
const Kokkos::View<F_FLOAT***, Kokkos::LayoutLeft, DeviceType> &beta_pack); // ForceSNAP
const Kokkos::View<real_type***, Kokkos::LayoutLeft, DeviceType> &beta_pack); // ForceSNAP
KOKKOS_INLINE_FUNCTION
void compute_bi(const int&, const int&, const int&); // ForceSNAP
@ -104,34 +136,33 @@ inline
void compute_zi_cpu(const int&); // ForceSNAP
KOKKOS_INLINE_FUNCTION
void compute_yi_cpu(int,
const Kokkos::View<F_FLOAT**, DeviceType> &beta); // ForceSNAP
const Kokkos::View<real_type**, DeviceType> &beta); // ForceSNAP
KOKKOS_INLINE_FUNCTION
void compute_bi_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int); // ForceSNAP
// functions for derivatives, GPU only
KOKKOS_INLINE_FUNCTION
void compute_fused_deidrj(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, const int, const int); //ForceSNAP
void compute_fused_deidrj(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, const int, const int, const int, const int); //ForceSNAP
// functions for derivatives, CPU only
KOKKOS_INLINE_FUNCTION
void compute_duidrj_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int, int); //ForceSNAP
KOKKOS_INLINE_FUNCTION
void compute_deidrj_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int, int); // ForceSNAP
KOKKOS_INLINE_FUNCTION
double compute_sfac(double, double); // add_uarraytot, compute_duarray
KOKKOS_INLINE_FUNCTION
double compute_dsfac(double, double); // compute_duarray
KOKKOS_INLINE_FUNCTION
void compute_s_dsfac(const double, const double, double&, double&); // compute_cayley_klein
// efficient complex FMA
// efficient caxpy (i.e., y += a x)
static KOKKOS_FORCEINLINE_FUNCTION
void caxpy(const SNAcomplex& a, const SNAcomplex& x, SNAcomplex& y);
KOKKOS_INLINE_FUNCTION
real_type compute_sfac(real_type, real_type); // add_uarraytot, compute_duarray
KOKKOS_INLINE_FUNCTION
real_type compute_dsfac(real_type, real_type); // compute_duarray
KOKKOS_INLINE_FUNCTION
void compute_s_dsfac(const real_type, const real_type, real_type&, real_type&); // compute_cayley_klein
// efficient complex FMA, conjugate of scalar
static KOKKOS_FORCEINLINE_FUNCTION
void caconjxpy(const SNAcomplex& a, const SNAcomplex& x, SNAcomplex& y);
void sincos_wrapper(double x, double* sin_, double *cos_) { sincos(x, sin_, cos_); }
static KOKKOS_FORCEINLINE_FUNCTION
void sincos_wrapper(float x, float* sin_, float *cos_) { sincosf(x, sin_, cos_); }
// Set the direction for split ComputeDuidrj
KOKKOS_INLINE_FUNCTION
@ -146,10 +177,6 @@ inline
//per sna class instance for OMP use
// Alternative to rij, wj, rcutij...
// just calculate everything up front
t_sna_2ckp cayleyklein;
// Per InFlight Particle
t_sna_3d rij;
t_sna_2i inside;
@ -175,8 +202,14 @@ inline
t_sna_4c3_ll dulist;
// Modified structures for GPU backend
t_sna_3d_ll ulisttot_re; // split real,
t_sna_3d_ll ulisttot_im; // imag
t_sna_3c_ll a_pack; // Cayley-Klein `a`
t_sna_3c_ll b_pack; // `b`
t_sna_4c_ll da_pack; // `da`
t_sna_4c_ll db_pack; // `db`
t_sna_4d_ll sfac_pack; // sfac, dsfac_{x,y,z}
t_sna_4d_ll ulisttot_re_pack; // split real,
t_sna_4d_ll ulisttot_im_pack; // imag, AoSoA, flattened
t_sna_4c_ll ulisttot_pack; // AoSoA layout
t_sna_4c_ll zlist_pack; // AoSoA layout
t_sna_4d_ll blist_pack;
@ -191,7 +224,7 @@ inline
int ntriples;
private:
double rmin0, rfac0;
real_type rmin0, rfac0;
//use indexlist instead of loops, constructor generates these
// Same across all SNAKokkos
@ -203,6 +236,7 @@ public:
Kokkos::View<int*, DeviceType> idxu_block;
Kokkos::View<int*, DeviceType> idxu_half_block;
Kokkos::View<int*, DeviceType> idxu_cache_block;
Kokkos::View<FullHalfMapper*, DeviceType> idxu_full_half;
private:
Kokkos::View<int***, DeviceType> idxz_block;
@ -231,12 +265,12 @@ inline
void init_rootpqarray(); // init()
KOKKOS_INLINE_FUNCTION
void add_uarraytot(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int, int, double, double, double, int); // compute_ui
void add_uarraytot(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int, int, const real_type&, const real_type&, const real_type&, int); // compute_ui
KOKKOS_INLINE_FUNCTION
void compute_uarray_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int, int,
double, double, double,
double, double); // compute_ui_cpu
const real_type&, const real_type&, const real_type&,
const real_type&, const real_type&); // compute_ui_cpu
inline
@ -246,8 +280,8 @@ inline
int compute_ncoeff(); // SNAKokkos()
KOKKOS_INLINE_FUNCTION
void compute_duarray_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int, int,
double, double, double, // compute_duidrj_cpu
double, double, double, double, double);
const real_type&, const real_type&, const real_type&, // compute_duidrj_cpu
const real_type&, const real_type&, const real_type&, const real_type&, const real_type&);
// Sets the style for the switching function
// 0 = none
@ -259,11 +293,11 @@ inline
int bnorm_flag;
// Self-weight
double wself;
real_type wself;
int wselfall_flag;
int bzero_flag; // 1 if bzero subtracted from barray
Kokkos::View<double*, DeviceType> bzero; // array of B values for isolated atoms
Kokkos::View<real_type*, DeviceType> bzero; // array of B values for isolated atoms
// for per-direction dulist calculation, specify the direction.
int dir;

File diff suppressed because it is too large Load Diff

View File

@ -657,7 +657,7 @@ void PairSNAP::read_files(char *coefffilename, char *paramfilename)
chemflag = 0;
bnormflag = 0;
wselfallflag = 0;
chunksize = 2000;
chunksize = 4096;
// open SNAP parameter file on proc 0