Switch 'real' to 'real_type'
This commit is contained in:
@ -1090,20 +1090,20 @@ struct params_lj_coul {
|
|||||||
typedef double SNAreal;
|
typedef double SNAreal;
|
||||||
|
|
||||||
//typedef struct { SNAreal re, im; } SNAcomplex;
|
//typedef struct { SNAreal re, im; } SNAcomplex;
|
||||||
template <typename real_type>
|
template <typename real_type_>
|
||||||
struct alignas(2*sizeof(real_type)) SNAComplex
|
struct alignas(2*sizeof(real_type_)) SNAComplex
|
||||||
{
|
{
|
||||||
using real = real_type;
|
using real_type = real_type_;
|
||||||
using complex = SNAComplex<real>;
|
using complex = SNAComplex<real_type>;
|
||||||
real re,im;
|
real_type re,im;
|
||||||
|
|
||||||
KOKKOS_FORCEINLINE_FUNCTION SNAComplex()
|
KOKKOS_FORCEINLINE_FUNCTION SNAComplex()
|
||||||
: re(static_cast<real>(0.)), im(static_cast<real>(0.)) { ; }
|
: re(static_cast<real_type>(0.)), im(static_cast<real_type>(0.)) { ; }
|
||||||
|
|
||||||
KOKKOS_FORCEINLINE_FUNCTION SNAComplex(real re)
|
KOKKOS_FORCEINLINE_FUNCTION SNAComplex(real_type re)
|
||||||
: re(re), im(static_cast<real>(0.)) { ; }
|
: re(re), im(static_cast<real_type>(0.)) { ; }
|
||||||
|
|
||||||
KOKKOS_FORCEINLINE_FUNCTION SNAComplex(real re, real im)
|
KOKKOS_FORCEINLINE_FUNCTION SNAComplex(real_type re, real_type im)
|
||||||
: re(re), im(im) { ; }
|
: re(re), im(im) { ; }
|
||||||
|
|
||||||
KOKKOS_FORCEINLINE_FUNCTION SNAComplex(const SNAComplex& other)
|
KOKKOS_FORCEINLINE_FUNCTION SNAComplex(const SNAComplex& other)
|
||||||
@ -1132,19 +1132,19 @@ struct alignas(2*sizeof(real_type)) SNAComplex
|
|||||||
}
|
}
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
static constexpr complex zero() { return complex(static_cast<real>(0.), static_cast<real>(0.)); }
|
static constexpr complex zero() { return complex(static_cast<real_type>(0.), static_cast<real_type>(0.)); }
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
static constexpr complex one() { return complex(static_cast<real>(1.), static_cast<real>(0.)); }
|
static constexpr complex one() { return complex(static_cast<real_type>(1.), static_cast<real_type>(0.)); }
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
const complex conj() { return complex(re, -im); }
|
const complex conj() { return complex(re, -im); }
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename real>
|
template <typename real_type>
|
||||||
KOKKOS_FORCEINLINE_FUNCTION SNAComplex<real> operator*(const real& r, const SNAComplex<real>& self) {
|
KOKKOS_FORCEINLINE_FUNCTION SNAComplex<real_type> operator*(const real_type& r, const SNAComplex<real_type>& self) {
|
||||||
return SNAComplex<real>(r*self.re, r*self.im);
|
return SNAComplex<real_type>(r*self.re, r*self.im);
|
||||||
}
|
}
|
||||||
|
|
||||||
typedef SNAComplex<SNAreal> SNAcomplex;
|
typedef SNAComplex<SNAreal> SNAcomplex;
|
||||||
|
|||||||
@ -65,7 +65,7 @@ struct TagPairSNAPComputeYiCPU{};
|
|||||||
struct TagPairSNAPComputeDuidrjCPU{};
|
struct TagPairSNAPComputeDuidrjCPU{};
|
||||||
struct TagPairSNAPComputeDeidrjCPU{};
|
struct TagPairSNAPComputeDeidrjCPU{};
|
||||||
|
|
||||||
template<class DeviceType, typename real_type, int vector_length_>
|
template<class DeviceType, typename real_type_, int vector_length_>
|
||||||
class PairSNAPKokkos : public PairSNAP {
|
class PairSNAPKokkos : public PairSNAP {
|
||||||
public:
|
public:
|
||||||
enum {EnabledNeighFlags=FULL|HALF|HALFTHREAD};
|
enum {EnabledNeighFlags=FULL|HALF|HALFTHREAD};
|
||||||
@ -75,12 +75,12 @@ public:
|
|||||||
typedef EV_FLOAT value_type;
|
typedef EV_FLOAT value_type;
|
||||||
|
|
||||||
static constexpr int vector_length = vector_length_;
|
static constexpr int vector_length = vector_length_;
|
||||||
using real = real_type;
|
using real_type = real_type_;
|
||||||
using complex = SNAComplex<real>;
|
using complex = SNAComplex<real_type>;
|
||||||
|
|
||||||
// type-dependent team sizes
|
// type-dependent team sizes
|
||||||
static constexpr int team_size_compute_ui = sizeof(real) == 4 ? 8 : 4;
|
static constexpr int team_size_compute_ui = sizeof(real_type) == 4 ? 8 : 4;
|
||||||
static constexpr int team_size_compute_fused_deidrj = sizeof(real) == 4 ? 4 : 2;
|
static constexpr int team_size_compute_fused_deidrj = sizeof(real_type) == 4 ? 4 : 2;
|
||||||
|
|
||||||
PairSNAPKokkos(class LAMMPS *);
|
PairSNAPKokkos(class LAMMPS *);
|
||||||
~PairSNAPKokkos();
|
~PairSNAPKokkos();
|
||||||
@ -190,7 +190,7 @@ protected:
|
|||||||
t_bvec bvec;
|
t_bvec bvec;
|
||||||
typedef Kokkos::View<F_FLOAT***> t_dbvec;
|
typedef Kokkos::View<F_FLOAT***> t_dbvec;
|
||||||
t_dbvec dbvec;
|
t_dbvec dbvec;
|
||||||
SNAKokkos<DeviceType, real, vector_length> snaKK;
|
SNAKokkos<DeviceType, real_type, vector_length> snaKK;
|
||||||
|
|
||||||
int inum,max_neighs,chunk_size,chunk_offset;
|
int inum,max_neighs,chunk_size,chunk_offset;
|
||||||
int host_flag;
|
int host_flag;
|
||||||
@ -225,14 +225,14 @@ inline double dist2(double* x,double* y);
|
|||||||
Kokkos::View<F_FLOAT****, Kokkos::LayoutRight, DeviceType> i_uarraytot_r, i_uarraytot_i;
|
Kokkos::View<F_FLOAT****, Kokkos::LayoutRight, DeviceType> i_uarraytot_r, i_uarraytot_i;
|
||||||
Kokkos::View<F_FLOAT******, Kokkos::LayoutRight, DeviceType> i_zarray_r, i_zarray_i;
|
Kokkos::View<F_FLOAT******, Kokkos::LayoutRight, DeviceType> i_zarray_r, i_zarray_i;
|
||||||
|
|
||||||
Kokkos::View<real*, DeviceType> d_radelem; // element radii
|
Kokkos::View<real_type*, DeviceType> d_radelem; // element radii
|
||||||
Kokkos::View<real*, DeviceType> d_wjelem; // elements weights
|
Kokkos::View<real_type*, DeviceType> d_wjelem; // elements weights
|
||||||
Kokkos::View<real**, Kokkos::LayoutRight, DeviceType> d_coeffelem; // element bispectrum coefficients
|
Kokkos::View<real_type**, Kokkos::LayoutRight, DeviceType> d_coeffelem; // element bispectrum coefficients
|
||||||
Kokkos::View<T_INT*, DeviceType> d_map; // mapping from atom types to elements
|
Kokkos::View<T_INT*, DeviceType> d_map; // mapping from atom types to elements
|
||||||
Kokkos::View<T_INT*, DeviceType> d_ninside; // ninside for all atoms in list
|
Kokkos::View<T_INT*, DeviceType> d_ninside; // ninside for all atoms in list
|
||||||
Kokkos::View<real**, DeviceType> d_beta; // betas for all atoms in list
|
Kokkos::View<real_type**, DeviceType> d_beta; // betas for all atoms in list
|
||||||
Kokkos::View<real***, Kokkos::LayoutLeft, DeviceType> d_beta_pack; // betas for all atoms in list, GPU
|
Kokkos::View<real_type***, Kokkos::LayoutLeft, DeviceType> d_beta_pack; // betas for all atoms in list, GPU
|
||||||
Kokkos::View<real**, DeviceType> d_bispectrum; // bispectrum components for all atoms in list
|
Kokkos::View<real_type**, DeviceType> d_bispectrum; // bispectrum components for all atoms in list
|
||||||
|
|
||||||
typedef Kokkos::DualView<F_FLOAT**, DeviceType> tdual_fparams;
|
typedef Kokkos::DualView<F_FLOAT**, DeviceType> tdual_fparams;
|
||||||
tdual_fparams k_cutsq;
|
tdual_fparams k_cutsq;
|
||||||
|
|||||||
@ -48,8 +48,8 @@ namespace LAMMPS_NS {
|
|||||||
//static double t7 = 0.0;
|
//static double t7 = 0.0;
|
||||||
/* ---------------------------------------------------------------------- */
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
template<class DeviceType, typename real, int vector_length>
|
template<class DeviceType, typename real_type, int vector_length>
|
||||||
PairSNAPKokkos<DeviceType, real, vector_length>::PairSNAPKokkos(LAMMPS *lmp) : PairSNAP(lmp)
|
PairSNAPKokkos<DeviceType, real_type, vector_length>::PairSNAPKokkos(LAMMPS *lmp) : PairSNAP(lmp)
|
||||||
{
|
{
|
||||||
respa_enable = 0;
|
respa_enable = 0;
|
||||||
|
|
||||||
@ -67,8 +67,8 @@ PairSNAPKokkos<DeviceType, real, vector_length>::PairSNAPKokkos(LAMMPS *lmp) : P
|
|||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
template<class DeviceType, typename real, int vector_length>
|
template<class DeviceType, typename real_type, int vector_length>
|
||||||
PairSNAPKokkos<DeviceType, real, vector_length>::~PairSNAPKokkos()
|
PairSNAPKokkos<DeviceType, real_type, vector_length>::~PairSNAPKokkos()
|
||||||
{
|
{
|
||||||
if (copymode) return;
|
if (copymode) return;
|
||||||
|
|
||||||
@ -81,8 +81,8 @@ PairSNAPKokkos<DeviceType, real, vector_length>::~PairSNAPKokkos()
|
|||||||
init specific to this pair style
|
init specific to this pair style
|
||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
template<class DeviceType, typename real, int vector_length>
|
template<class DeviceType, typename real_type, int vector_length>
|
||||||
void PairSNAPKokkos<DeviceType, real, vector_length>::init_style()
|
void PairSNAPKokkos<DeviceType, real_type, vector_length>::init_style()
|
||||||
{
|
{
|
||||||
if (force->newton_pair == 0)
|
if (force->newton_pair == 0)
|
||||||
error->all(FLERR,"Pair style SNAP requires newton pair on");
|
error->all(FLERR,"Pair style SNAP requires newton pair on");
|
||||||
@ -128,8 +128,8 @@ struct FindMaxNumNeighs {
|
|||||||
This version is a straightforward implementation
|
This version is a straightforward implementation
|
||||||
---------------------------------------------------------------------- */
|
---------------------------------------------------------------------- */
|
||||||
|
|
||||||
template<class DeviceType, typename real, int vector_length>
|
template<class DeviceType, typename real_type, int vector_length>
|
||||||
void PairSNAPKokkos<DeviceType, real, vector_length>::compute(int eflag_in, int vflag_in)
|
void PairSNAPKokkos<DeviceType, real_type, vector_length>::compute(int eflag_in, int vflag_in)
|
||||||
{
|
{
|
||||||
eflag = eflag_in;
|
eflag = eflag_in;
|
||||||
vflag = vflag_in;
|
vflag = vflag_in;
|
||||||
@ -192,9 +192,9 @@ void PairSNAPKokkos<DeviceType, real, vector_length>::compute(int eflag_in, int
|
|||||||
|
|
||||||
if (beta_max < inum) {
|
if (beta_max < inum) {
|
||||||
beta_max = inum;
|
beta_max = inum;
|
||||||
d_beta = Kokkos::View<real**, DeviceType>("PairSNAPKokkos:beta",ncoeff,inum);
|
d_beta = Kokkos::View<real_type**, DeviceType>("PairSNAPKokkos:beta",ncoeff,inum);
|
||||||
if (!host_flag)
|
if (!host_flag)
|
||||||
d_beta_pack = Kokkos::View<real***, Kokkos::LayoutLeft, DeviceType>("PairSNAPKokkos:beta_pack",vector_length,ncoeff,(inum + vector_length - 1) / vector_length);
|
d_beta_pack = Kokkos::View<real_type***, Kokkos::LayoutLeft, DeviceType>("PairSNAPKokkos:beta_pack",vector_length,ncoeff,(inum + vector_length - 1) / vector_length);
|
||||||
d_ninside = Kokkos::View<int*, DeviceType>("PairSNAPKokkos:ninside",inum);
|
d_ninside = Kokkos::View<int*, DeviceType>("PairSNAPKokkos:ninside",inum);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -501,8 +501,8 @@ void PairSNAPKokkos<DeviceType, real, vector_length>::compute(int eflag_in, int
|
|||||||
allocate all arrays
|
allocate all arrays
|
||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
template<class DeviceType, typename real, int vector_length>
|
template<class DeviceType, typename real_type, int vector_length>
|
||||||
void PairSNAPKokkos<DeviceType, real, vector_length>::allocate()
|
void PairSNAPKokkos<DeviceType, real_type, vector_length>::allocate()
|
||||||
{
|
{
|
||||||
PairSNAP::allocate();
|
PairSNAP::allocate();
|
||||||
|
|
||||||
@ -515,8 +515,8 @@ void PairSNAPKokkos<DeviceType, real, vector_length>::allocate()
|
|||||||
init for one type pair i,j and corresponding j,i
|
init for one type pair i,j and corresponding j,i
|
||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
template<class DeviceType, typename real, int vector_length>
|
template<class DeviceType, typename real_type, int vector_length>
|
||||||
double PairSNAPKokkos<DeviceType, real, vector_length>::init_one(int i, int j)
|
double PairSNAPKokkos<DeviceType, real_type, vector_length>::init_one(int i, int j)
|
||||||
{
|
{
|
||||||
double cutone = PairSNAP::init_one(i,j);
|
double cutone = PairSNAP::init_one(i,j);
|
||||||
k_cutsq.h_view(i,j) = k_cutsq.h_view(j,i) = cutone*cutone;
|
k_cutsq.h_view(i,j) = k_cutsq.h_view(j,i) = cutone*cutone;
|
||||||
@ -529,16 +529,16 @@ double PairSNAPKokkos<DeviceType, real, vector_length>::init_one(int i, int j)
|
|||||||
set coeffs for one or more type pairs
|
set coeffs for one or more type pairs
|
||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
template<class DeviceType, typename real, int vector_length>
|
template<class DeviceType, typename real_type, int vector_length>
|
||||||
void PairSNAPKokkos<DeviceType, real, vector_length>::coeff(int narg, char **arg)
|
void PairSNAPKokkos<DeviceType, real_type, vector_length>::coeff(int narg, char **arg)
|
||||||
{
|
{
|
||||||
PairSNAP::coeff(narg,arg);
|
PairSNAP::coeff(narg,arg);
|
||||||
|
|
||||||
// Set up element lists
|
// Set up element lists
|
||||||
|
|
||||||
d_radelem = Kokkos::View<real*, DeviceType>("pair:radelem",nelements);
|
d_radelem = Kokkos::View<real_type*, DeviceType>("pair:radelem",nelements);
|
||||||
d_wjelem = Kokkos::View<real*, DeviceType>("pair:wjelem",nelements);
|
d_wjelem = Kokkos::View<real_type*, DeviceType>("pair:wjelem",nelements);
|
||||||
d_coeffelem = Kokkos::View<real**, Kokkos::LayoutRight, DeviceType>("pair:coeffelem",nelements,ncoeffall);
|
d_coeffelem = Kokkos::View<real_type**, Kokkos::LayoutRight, DeviceType>("pair:coeffelem",nelements,ncoeffall);
|
||||||
|
|
||||||
auto h_radelem = Kokkos::create_mirror_view(d_radelem);
|
auto h_radelem = Kokkos::create_mirror_view(d_radelem);
|
||||||
auto h_wjelem = Kokkos::create_mirror_view(d_wjelem);
|
auto h_wjelem = Kokkos::create_mirror_view(d_wjelem);
|
||||||
@ -562,7 +562,7 @@ void PairSNAPKokkos<DeviceType, real, vector_length>::coeff(int narg, char **arg
|
|||||||
Kokkos::deep_copy(d_coeffelem,h_coeffelem);
|
Kokkos::deep_copy(d_coeffelem,h_coeffelem);
|
||||||
Kokkos::deep_copy(d_map,h_map);
|
Kokkos::deep_copy(d_map,h_map);
|
||||||
|
|
||||||
snaKK = SNAKokkos<DeviceType, real, vector_length>(rfac0,twojmax,
|
snaKK = SNAKokkos<DeviceType, real_type, vector_length>(rfac0,twojmax,
|
||||||
rmin0,switchflag,bzeroflag,chemflag,bnormflag,wselfallflag,nelements);
|
rmin0,switchflag,bzeroflag,chemflag,bnormflag,wselfallflag,nelements);
|
||||||
snaKK.grow_rij(0,0);
|
snaKK.grow_rij(0,0);
|
||||||
snaKK.init();
|
snaKK.init();
|
||||||
@ -573,9 +573,9 @@ void PairSNAPKokkos<DeviceType, real, vector_length>::coeff(int narg, char **arg
|
|||||||
of AoSoA data layouts and scratch memory for recursive polynomials
|
of AoSoA data layouts and scratch memory for recursive polynomials
|
||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
template<class DeviceType, typename real, int vector_length>
|
template<class DeviceType, typename real_type, int vector_length>
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPBeta,const int& ii) const {
|
void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPBeta,const int& ii) const {
|
||||||
|
|
||||||
if (ii >= chunk_size) return;
|
if (ii >= chunk_size) return;
|
||||||
|
|
||||||
@ -585,7 +585,7 @@ void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPBet
|
|||||||
const int i = d_ilist[ii + chunk_offset];
|
const int i = d_ilist[ii + chunk_offset];
|
||||||
const int itype = type[i];
|
const int itype = type[i];
|
||||||
const int ielem = d_map[itype];
|
const int ielem = d_map[itype];
|
||||||
SNAKokkos<DeviceType, real, vector_length> my_sna = snaKK;
|
SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
|
||||||
|
|
||||||
auto d_coeffi = Kokkos::subview(d_coeffelem, ielem, Kokkos::ALL);
|
auto d_coeffi = Kokkos::subview(d_coeffelem, ielem, Kokkos::ALL);
|
||||||
|
|
||||||
@ -605,7 +605,7 @@ void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPBet
|
|||||||
for (int jcoeff = icoeff+1; jcoeff < ncoeff; jcoeff++) {
|
for (int jcoeff = icoeff+1; jcoeff < ncoeff; jcoeff++) {
|
||||||
const auto jdxb = jcoeff % idxb_max;
|
const auto jdxb = jcoeff % idxb_max;
|
||||||
const auto jdx_chem = jcoeff / idxb_max;
|
const auto jdx_chem = jcoeff / idxb_max;
|
||||||
real bvecj = my_sna.blist(jdxb, jdx_chem, ii);
|
real_type bvecj = my_sna.blist(jdxb, jdx_chem, ii);
|
||||||
d_beta_pack(iatom_mod,icoeff,iatom_div) += d_coeffi[k]*bvecj;
|
d_beta_pack(iatom_mod,icoeff,iatom_div) += d_coeffi[k]*bvecj;
|
||||||
d_beta_pack(iatom_mod,jcoeff,iatom_div) += d_coeffi[k]*bveci;
|
d_beta_pack(iatom_mod,jcoeff,iatom_div) += d_coeffi[k]*bveci;
|
||||||
k++;
|
k++;
|
||||||
@ -614,11 +614,11 @@ void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPBet
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class DeviceType, typename real, int vector_length>
|
template<class DeviceType, typename real_type, int vector_length>
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPComputeNeigh,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPComputeNeigh>::member_type& team) const {
|
void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPComputeNeigh,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPComputeNeigh>::member_type& team) const {
|
||||||
|
|
||||||
SNAKokkos<DeviceType, real, vector_length> my_sna = snaKK;
|
SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
|
||||||
|
|
||||||
// extract atom number
|
// extract atom number
|
||||||
int ii = team.team_rank() + team.league_rank() * team.team_size();
|
int ii = team.team_rank() + team.league_rank() * team.team_size();
|
||||||
@ -686,11 +686,11 @@ void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPCom
|
|||||||
const F_FLOAT dy = x(j,1) - ytmp;
|
const F_FLOAT dy = x(j,1) - ytmp;
|
||||||
const F_FLOAT dz = x(j,2) - ztmp;
|
const F_FLOAT dz = x(j,2) - ztmp;
|
||||||
const int elem_j = d_map[jtype];
|
const int elem_j = d_map[jtype];
|
||||||
my_sna.rij(ii,offset,0) = static_cast<real>(dx);
|
my_sna.rij(ii,offset,0) = static_cast<real_type>(dx);
|
||||||
my_sna.rij(ii,offset,1) = static_cast<real>(dy);
|
my_sna.rij(ii,offset,1) = static_cast<real_type>(dy);
|
||||||
my_sna.rij(ii,offset,2) = static_cast<real>(dz);
|
my_sna.rij(ii,offset,2) = static_cast<real_type>(dz);
|
||||||
my_sna.wj(ii,offset) = static_cast<real>(d_wjelem[elem_j]);
|
my_sna.wj(ii,offset) = static_cast<real_type>(d_wjelem[elem_j]);
|
||||||
my_sna.rcutij(ii,offset) = static_cast<real>((radi + d_radelem[elem_j])*rcutfac);
|
my_sna.rcutij(ii,offset) = static_cast<real_type>((radi + d_radelem[elem_j])*rcutfac);
|
||||||
my_sna.inside(ii,offset) = j;
|
my_sna.inside(ii,offset) = j;
|
||||||
if (chemflag)
|
if (chemflag)
|
||||||
my_sna.element(ii,offset) = elem_j;
|
my_sna.element(ii,offset) = elem_j;
|
||||||
@ -702,10 +702,10 @@ void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPCom
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class DeviceType, typename real, int vector_length>
|
template<class DeviceType, typename real_type, int vector_length>
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPComputeCayleyKlein,const int iatom_mod, const int jnbor, const int iatom_div) const {
|
void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPComputeCayleyKlein,const int iatom_mod, const int jnbor, const int iatom_div) const {
|
||||||
SNAKokkos<DeviceType, real, vector_length> my_sna = snaKK;
|
SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
|
||||||
|
|
||||||
const int ii = iatom_mod + iatom_div * vector_length;
|
const int ii = iatom_mod + iatom_div * vector_length;
|
||||||
if (ii >= chunk_size) return;
|
if (ii >= chunk_size) return;
|
||||||
@ -716,10 +716,10 @@ void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPCom
|
|||||||
my_sna.compute_cayley_klein(iatom_mod,jnbor,iatom_div);
|
my_sna.compute_cayley_klein(iatom_mod,jnbor,iatom_div);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class DeviceType, typename real, int vector_length>
|
template<class DeviceType, typename real_type, int vector_length>
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPPreUi, const int iatom_mod, const int j, const int iatom_div) const {
|
void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPPreUi, const int iatom_mod, const int j, const int iatom_div) const {
|
||||||
SNAKokkos<DeviceType, real, vector_length> my_sna = snaKK;
|
SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
|
||||||
|
|
||||||
const int ii = iatom_mod + iatom_div * vector_length;
|
const int ii = iatom_mod + iatom_div * vector_length;
|
||||||
if (ii >= chunk_size) return;
|
if (ii >= chunk_size) return;
|
||||||
@ -730,10 +730,10 @@ void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPPre
|
|||||||
my_sna.pre_ui(iatom_mod, j, ielem, iatom_div);
|
my_sna.pre_ui(iatom_mod, j, ielem, iatom_div);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class DeviceType, typename real, int vector_length>
|
template<class DeviceType, typename real_type, int vector_length>
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPComputeUi,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPComputeUi>::member_type& team) const {
|
void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPComputeUi,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPComputeUi>::member_type& team) const {
|
||||||
SNAKokkos<DeviceType, real, vector_length> my_sna = snaKK;
|
SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
|
||||||
|
|
||||||
// extract flattened atom_div / neighbor number / bend location
|
// extract flattened atom_div / neighbor number / bend location
|
||||||
int flattened_idx = team.team_rank() + team.league_rank() * team_size_compute_ui;
|
int flattened_idx = team.team_rank() + team.league_rank() * team_size_compute_ui;
|
||||||
@ -757,10 +757,10 @@ void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPCom
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class DeviceType, typename real, int vector_length>
|
template<class DeviceType, typename real_type, int vector_length>
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPTransformUi,const int iatom_mod, const int idxu, const int iatom_div) const {
|
void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPTransformUi,const int iatom_mod, const int idxu, const int iatom_div) const {
|
||||||
SNAKokkos<DeviceType, real, vector_length> my_sna = snaKK;
|
SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
|
||||||
|
|
||||||
const int iatom = iatom_mod + iatom_div * vector_length;
|
const int iatom = iatom_mod + iatom_div * vector_length;
|
||||||
if (iatom >= chunk_size) return;
|
if (iatom >= chunk_size) return;
|
||||||
@ -791,10 +791,10 @@ void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPTra
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class DeviceType, typename real, int vector_length>
|
template<class DeviceType, typename real_type, int vector_length>
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPComputeYi,const int iatom_mod, const int jjz, const int iatom_div) const {
|
void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPComputeYi,const int iatom_mod, const int jjz, const int iatom_div) const {
|
||||||
SNAKokkos<DeviceType, real, vector_length> my_sna = snaKK;
|
SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
|
||||||
|
|
||||||
const int iatom = iatom_mod + iatom_div * vector_length;
|
const int iatom = iatom_mod + iatom_div * vector_length;
|
||||||
if (iatom >= chunk_size) return;
|
if (iatom >= chunk_size) return;
|
||||||
@ -804,10 +804,10 @@ void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPCom
|
|||||||
my_sna.compute_yi(iatom_mod,jjz,iatom_div,d_beta_pack);
|
my_sna.compute_yi(iatom_mod,jjz,iatom_div,d_beta_pack);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class DeviceType, typename real, int vector_length>
|
template<class DeviceType, typename real_type, int vector_length>
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPComputeZi,const int iatom_mod, const int jjz, const int iatom_div) const {
|
void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPComputeZi,const int iatom_mod, const int jjz, const int iatom_div) const {
|
||||||
SNAKokkos<DeviceType, real, vector_length> my_sna = snaKK;
|
SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
|
||||||
|
|
||||||
const int iatom = iatom_mod + iatom_div * vector_length;
|
const int iatom = iatom_mod + iatom_div * vector_length;
|
||||||
if (iatom >= chunk_size) return;
|
if (iatom >= chunk_size) return;
|
||||||
@ -817,10 +817,10 @@ void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPCom
|
|||||||
my_sna.compute_zi(iatom_mod,jjz,iatom_div);
|
my_sna.compute_zi(iatom_mod,jjz,iatom_div);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class DeviceType, typename real, int vector_length>
|
template<class DeviceType, typename real_type, int vector_length>
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPComputeBi,const int iatom_mod, const int jjb, const int iatom_div) const {
|
void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPComputeBi,const int iatom_mod, const int jjb, const int iatom_div) const {
|
||||||
SNAKokkos<DeviceType, real, vector_length> my_sna = snaKK;
|
SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
|
||||||
|
|
||||||
const int iatom = iatom_mod + iatom_div * vector_length;
|
const int iatom = iatom_mod + iatom_div * vector_length;
|
||||||
if (iatom >= chunk_size) return;
|
if (iatom >= chunk_size) return;
|
||||||
@ -830,10 +830,10 @@ void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPCom
|
|||||||
my_sna.compute_bi(iatom_mod,jjb,iatom_div);
|
my_sna.compute_bi(iatom_mod,jjb,iatom_div);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class DeviceType, typename real, int vector_length>
|
template<class DeviceType, typename real_type, int vector_length>
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPTransformBi,const int iatom_mod, const int idxb, const int iatom_div) const {
|
void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPTransformBi,const int iatom_mod, const int idxb, const int iatom_div) const {
|
||||||
SNAKokkos<DeviceType, real, vector_length> my_sna = snaKK;
|
SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
|
||||||
|
|
||||||
const int iatom = iatom_mod + iatom_div * vector_length;
|
const int iatom = iatom_mod + iatom_div * vector_length;
|
||||||
if (iatom >= chunk_size) return;
|
if (iatom >= chunk_size) return;
|
||||||
@ -851,10 +851,10 @@ void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPTra
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class DeviceType, typename real, int vector_length>
|
template<class DeviceType, typename real_type, int vector_length>
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPComputeFusedDeidrj,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPComputeFusedDeidrj>::member_type& team) const {
|
void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPComputeFusedDeidrj,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPComputeFusedDeidrj>::member_type& team) const {
|
||||||
SNAKokkos<DeviceType, real, vector_length> my_sna = snaKK;
|
SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
|
||||||
|
|
||||||
// extract flattened atom_div / neighbor number / bend location
|
// extract flattened atom_div / neighbor number / bend location
|
||||||
int flattened_idx = team.team_rank() + team.league_rank() * team_size_compute_fused_deidrj;
|
int flattened_idx = team.team_rank() + team.league_rank() * team_size_compute_fused_deidrj;
|
||||||
@ -887,14 +887,14 @@ void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPCom
|
|||||||
different arithmetic intensity requirements for the CPU vs GPU.
|
different arithmetic intensity requirements for the CPU vs GPU.
|
||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
template<class DeviceType, typename real, int vector_length>
|
template<class DeviceType, typename real_type, int vector_length>
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPBetaCPU,const int& ii) const {
|
void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPBetaCPU,const int& ii) const {
|
||||||
|
|
||||||
const int i = d_ilist[ii + chunk_offset];
|
const int i = d_ilist[ii + chunk_offset];
|
||||||
const int itype = type[i];
|
const int itype = type[i];
|
||||||
const int ielem = d_map[itype];
|
const int ielem = d_map[itype];
|
||||||
SNAKokkos<DeviceType, real, vector_length> my_sna = snaKK;
|
SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
|
||||||
|
|
||||||
auto d_coeffi = Kokkos::subview(d_coeffelem, ielem, Kokkos::ALL);
|
auto d_coeffi = Kokkos::subview(d_coeffelem, ielem, Kokkos::ALL);
|
||||||
|
|
||||||
@ -922,14 +922,14 @@ void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPBet
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class DeviceType, typename real, int vector_length>
|
template<class DeviceType, typename real_type, int vector_length>
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPComputeNeighCPU,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPComputeNeighCPU>::member_type& team) const {
|
void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPComputeNeighCPU,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPComputeNeighCPU>::member_type& team) const {
|
||||||
|
|
||||||
|
|
||||||
int ii = team.league_rank();
|
int ii = team.league_rank();
|
||||||
const int i = d_ilist[ii + chunk_offset];
|
const int i = d_ilist[ii + chunk_offset];
|
||||||
SNAKokkos<DeviceType, real, vector_length> my_sna = snaKK;
|
SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
|
||||||
const double xtmp = x(i,0);
|
const double xtmp = x(i,0);
|
||||||
const double ytmp = x(i,1);
|
const double ytmp = x(i,1);
|
||||||
const double ztmp = x(i,2);
|
const double ztmp = x(i,2);
|
||||||
@ -979,11 +979,11 @@ void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPCom
|
|||||||
|
|
||||||
if (rsq < rnd_cutsq(itype,jtype)) {
|
if (rsq < rnd_cutsq(itype,jtype)) {
|
||||||
if (final) {
|
if (final) {
|
||||||
my_sna.rij(ii,offset,0) = static_cast<real>(dx);
|
my_sna.rij(ii,offset,0) = static_cast<real_type>(dx);
|
||||||
my_sna.rij(ii,offset,1) = static_cast<real>(dy);
|
my_sna.rij(ii,offset,1) = static_cast<real_type>(dy);
|
||||||
my_sna.rij(ii,offset,2) = static_cast<real>(dz);
|
my_sna.rij(ii,offset,2) = static_cast<real_type>(dz);
|
||||||
my_sna.wj(ii,offset) = static_cast<real>(d_wjelem[elem_j]);
|
my_sna.wj(ii,offset) = static_cast<real_type>(d_wjelem[elem_j]);
|
||||||
my_sna.rcutij(ii,offset) = static_cast<real>((radi + d_radelem[elem_j])*rcutfac);
|
my_sna.rcutij(ii,offset) = static_cast<real_type>((radi + d_radelem[elem_j])*rcutfac);
|
||||||
my_sna.inside(ii,offset) = j;
|
my_sna.inside(ii,offset) = j;
|
||||||
if (chemflag)
|
if (chemflag)
|
||||||
my_sna.element(ii,offset) = elem_j;
|
my_sna.element(ii,offset) = elem_j;
|
||||||
@ -996,10 +996,10 @@ void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPCom
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template<class DeviceType, typename real, int vector_length>
|
template<class DeviceType, typename real_type, int vector_length>
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPPreUiCPU,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPPreUiCPU>::member_type& team) const {
|
void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPPreUiCPU,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPPreUiCPU>::member_type& team) const {
|
||||||
SNAKokkos<DeviceType, real, vector_length> my_sna = snaKK;
|
SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
|
||||||
|
|
||||||
// Extract the atom number
|
// Extract the atom number
|
||||||
const int ii = team.team_rank() + team.team_size() * team.league_rank();
|
const int ii = team.team_rank() + team.team_size() * team.league_rank();
|
||||||
@ -1012,10 +1012,10 @@ void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPPre
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
template<class DeviceType, typename real, int vector_length>
|
template<class DeviceType, typename real_type, int vector_length>
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPComputeUiCPU,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPComputeUiCPU>::member_type& team) const {
|
void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPComputeUiCPU,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPComputeUiCPU>::member_type& team) const {
|
||||||
SNAKokkos<DeviceType, real, vector_length> my_sna = snaKK;
|
SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
|
||||||
|
|
||||||
// Extract the atom number
|
// Extract the atom number
|
||||||
int ii = team.team_rank() + team.team_size() * (team.league_rank() % ((chunk_size+team.team_size()-1)/team.team_size()));
|
int ii = team.team_rank() + team.team_size() * (team.league_rank() % ((chunk_size+team.team_size()-1)/team.team_size()));
|
||||||
@ -1029,10 +1029,10 @@ void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPCom
|
|||||||
my_sna.compute_ui_cpu(team,ii,jj);
|
my_sna.compute_ui_cpu(team,ii,jj);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class DeviceType, typename real, int vector_length>
|
template<class DeviceType, typename real_type, int vector_length>
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPTransformUiCPU, const int j, const int iatom) const {
|
void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPTransformUiCPU, const int j, const int iatom) const {
|
||||||
SNAKokkos<DeviceType, real, vector_length> my_sna = snaKK;
|
SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
|
||||||
|
|
||||||
if (iatom >= chunk_size) return;
|
if (iatom >= chunk_size) return;
|
||||||
|
|
||||||
@ -1079,32 +1079,32 @@ void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPTra
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class DeviceType, typename real, int vector_length>
|
template<class DeviceType, typename real_type, int vector_length>
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPComputeYiCPU,const int& ii) const {
|
void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPComputeYiCPU,const int& ii) const {
|
||||||
SNAKokkos<DeviceType, real, vector_length> my_sna = snaKK;
|
SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
|
||||||
my_sna.compute_yi_cpu(ii,d_beta);
|
my_sna.compute_yi_cpu(ii,d_beta);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class DeviceType, typename real, int vector_length>
|
template<class DeviceType, typename real_type, int vector_length>
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPComputeZiCPU,const int& ii) const {
|
void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPComputeZiCPU,const int& ii) const {
|
||||||
SNAKokkos<DeviceType, real, vector_length> my_sna = snaKK;
|
SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
|
||||||
my_sna.compute_zi_cpu(ii);
|
my_sna.compute_zi_cpu(ii);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class DeviceType, typename real, int vector_length>
|
template<class DeviceType, typename real_type, int vector_length>
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPComputeBiCPU,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPComputeBiCPU>::member_type& team) const {
|
void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPComputeBiCPU,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPComputeBiCPU>::member_type& team) const {
|
||||||
int ii = team.league_rank();
|
int ii = team.league_rank();
|
||||||
SNAKokkos<DeviceType, real, vector_length> my_sna = snaKK;
|
SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
|
||||||
my_sna.compute_bi_cpu(team,ii);
|
my_sna.compute_bi_cpu(team,ii);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class DeviceType, typename real, int vector_length>
|
template<class DeviceType, typename real_type, int vector_length>
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPComputeDuidrjCPU,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPComputeDuidrjCPU>::member_type& team) const {
|
void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPComputeDuidrjCPU,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPComputeDuidrjCPU>::member_type& team) const {
|
||||||
SNAKokkos<DeviceType, real, vector_length> my_sna = snaKK;
|
SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
|
||||||
|
|
||||||
// Extract the atom number
|
// Extract the atom number
|
||||||
int ii = team.team_rank() + team.team_size() * (team.league_rank() % ((chunk_size+team.team_size()-1)/team.team_size()));
|
int ii = team.team_rank() + team.team_size() * (team.league_rank() % ((chunk_size+team.team_size()-1)/team.team_size()));
|
||||||
@ -1118,10 +1118,10 @@ void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPCom
|
|||||||
my_sna.compute_duidrj_cpu(team,ii,jj);
|
my_sna.compute_duidrj_cpu(team,ii,jj);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class DeviceType, typename real, int vector_length>
|
template<class DeviceType, typename real_type, int vector_length>
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPComputeDeidrjCPU,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPComputeDeidrjCPU>::member_type& team) const {
|
void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPComputeDeidrjCPU,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPComputeDeidrjCPU>::member_type& team) const {
|
||||||
SNAKokkos<DeviceType, real, vector_length> my_sna = snaKK;
|
SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
|
||||||
|
|
||||||
// Extract the atom number
|
// Extract the atom number
|
||||||
int ii = team.team_rank() + team.team_size() * (team.league_rank() % ((chunk_size+team.team_size()-1)/team.team_size()));
|
int ii = team.team_rank() + team.team_size() * (team.league_rank() % ((chunk_size+team.team_size()-1)/team.team_size()));
|
||||||
@ -1141,10 +1141,10 @@ void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPCom
|
|||||||
likely not worth it.
|
likely not worth it.
|
||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
template<class DeviceType, typename real, int vector_length>
|
template<class DeviceType, typename real_type, int vector_length>
|
||||||
template<int NEIGHFLAG, int EVFLAG>
|
template<int NEIGHFLAG, int EVFLAG>
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPComputeForce<NEIGHFLAG,EVFLAG>,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPComputeForce<NEIGHFLAG,EVFLAG> >::member_type& team, EV_FLOAT& ev) const {
|
void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPComputeForce<NEIGHFLAG,EVFLAG>,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPComputeForce<NEIGHFLAG,EVFLAG> >::member_type& team, EV_FLOAT& ev) const {
|
||||||
|
|
||||||
// The f array is duplicated for OpenMP, atomic for CUDA, and neither for Serial
|
// The f array is duplicated for OpenMP, atomic for CUDA, and neither for Serial
|
||||||
|
|
||||||
@ -1153,7 +1153,7 @@ void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPCom
|
|||||||
|
|
||||||
int ii = team.league_rank();
|
int ii = team.league_rank();
|
||||||
const int i = d_ilist[ii + chunk_offset];
|
const int i = d_ilist[ii + chunk_offset];
|
||||||
SNAKokkos<DeviceType, real, vector_length> my_sna = snaKK;
|
SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
|
||||||
const int ninside = d_ninside(ii);
|
const int ninside = d_ninside(ii);
|
||||||
|
|
||||||
Kokkos::parallel_for (Kokkos::TeamThreadRange(team,ninside),
|
Kokkos::parallel_for (Kokkos::TeamThreadRange(team,ninside),
|
||||||
@ -1242,20 +1242,20 @@ void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPCom
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class DeviceType, typename real, int vector_length>
|
template<class DeviceType, typename real_type, int vector_length>
|
||||||
template<int NEIGHFLAG, int EVFLAG>
|
template<int NEIGHFLAG, int EVFLAG>
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
void PairSNAPKokkos<DeviceType, real, vector_length>::operator() (TagPairSNAPComputeForce<NEIGHFLAG,EVFLAG>,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPComputeForce<NEIGHFLAG,EVFLAG> >::member_type& team) const {
|
void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPComputeForce<NEIGHFLAG,EVFLAG>,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPComputeForce<NEIGHFLAG,EVFLAG> >::member_type& team) const {
|
||||||
EV_FLOAT ev;
|
EV_FLOAT ev;
|
||||||
this->template operator()<NEIGHFLAG,EVFLAG>(TagPairSNAPComputeForce<NEIGHFLAG,EVFLAG>(), team, ev);
|
this->template operator()<NEIGHFLAG,EVFLAG>(TagPairSNAPComputeForce<NEIGHFLAG,EVFLAG>(), team, ev);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
template<class DeviceType, typename real, int vector_length>
|
template<class DeviceType, typename real_type, int vector_length>
|
||||||
template<int NEIGHFLAG>
|
template<int NEIGHFLAG>
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
void PairSNAPKokkos<DeviceType, real, vector_length>::v_tally_xyz(EV_FLOAT &ev, const int &i, const int &j,
|
void PairSNAPKokkos<DeviceType, real_type, vector_length>::v_tally_xyz(EV_FLOAT &ev, const int &i, const int &j,
|
||||||
const F_FLOAT &fx, const F_FLOAT &fy, const F_FLOAT &fz,
|
const F_FLOAT &fx, const F_FLOAT &fy, const F_FLOAT &fz,
|
||||||
const F_FLOAT &delx, const F_FLOAT &dely, const F_FLOAT &delz) const
|
const F_FLOAT &delx, const F_FLOAT &dely, const F_FLOAT &delz) const
|
||||||
{
|
{
|
||||||
@ -1300,24 +1300,24 @@ void PairSNAPKokkos<DeviceType, real, vector_length>::v_tally_xyz(EV_FLOAT &ev,
|
|||||||
memory usage
|
memory usage
|
||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
template<class DeviceType, typename real, int vector_length>
|
template<class DeviceType, typename real_type, int vector_length>
|
||||||
double PairSNAPKokkos<DeviceType, real, vector_length>::memory_usage()
|
double PairSNAPKokkos<DeviceType, real_type, vector_length>::memory_usage()
|
||||||
{
|
{
|
||||||
double bytes = Pair::memory_usage();
|
double bytes = Pair::memory_usage();
|
||||||
int n = atom->ntypes+1;
|
int n = atom->ntypes+1;
|
||||||
bytes += n*n*sizeof(int);
|
bytes += n*n*sizeof(int);
|
||||||
bytes += n*n*sizeof(real);
|
bytes += n*n*sizeof(real_type);
|
||||||
bytes += (2*ncoeffall)*sizeof(real);
|
bytes += (2*ncoeffall)*sizeof(real_type);
|
||||||
bytes += (ncoeff*3)*sizeof(real);
|
bytes += (ncoeff*3)*sizeof(real_type);
|
||||||
bytes += snaKK.memory_usage();
|
bytes += snaKK.memory_usage();
|
||||||
return bytes;
|
return bytes;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
template<class DeviceType, typename real, int vector_length>
|
template<class DeviceType, typename real_type, int vector_length>
|
||||||
template<class TagStyle>
|
template<class TagStyle>
|
||||||
void PairSNAPKokkos<DeviceType, real, vector_length>::check_team_size_for(int inum, int &team_size) {
|
void PairSNAPKokkos<DeviceType, real_type, vector_length>::check_team_size_for(int inum, int &team_size) {
|
||||||
int team_size_max;
|
int team_size_max;
|
||||||
|
|
||||||
team_size_max = Kokkos::TeamPolicy<DeviceType,TagStyle>(inum,Kokkos::AUTO).team_size_max(*this,Kokkos::ParallelForTag());
|
team_size_max = Kokkos::TeamPolicy<DeviceType,TagStyle>(inum,Kokkos::AUTO).team_size_max(*this,Kokkos::ParallelForTag());
|
||||||
@ -1326,9 +1326,9 @@ void PairSNAPKokkos<DeviceType, real, vector_length>::check_team_size_for(int in
|
|||||||
team_size = team_size_max/vector_length;
|
team_size = team_size_max/vector_length;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class DeviceType, typename real, int vector_length>
|
template<class DeviceType, typename real_type, int vector_length>
|
||||||
template<class TagStyle>
|
template<class TagStyle>
|
||||||
void PairSNAPKokkos<DeviceType, real, vector_length>::check_team_size_reduce(int inum, int &team_size) {
|
void PairSNAPKokkos<DeviceType, real_type, vector_length>::check_team_size_reduce(int inum, int &team_size) {
|
||||||
int team_size_max;
|
int team_size_max;
|
||||||
|
|
||||||
team_size_max = Kokkos::TeamPolicy<DeviceType,TagStyle>(inum,Kokkos::AUTO).team_size_max(*this,Kokkos::ParallelReduceTag());
|
team_size_max = Kokkos::TeamPolicy<DeviceType,TagStyle>(inum,Kokkos::AUTO).team_size_max(*this,Kokkos::ParallelReduceTag());
|
||||||
|
|||||||
@ -25,18 +25,18 @@
|
|||||||
|
|
||||||
namespace LAMMPS_NS {
|
namespace LAMMPS_NS {
|
||||||
|
|
||||||
template<typename real_type, int vector_length_>
|
template<typename real_type_, int vector_length_>
|
||||||
struct WignerWrapper {
|
struct WignerWrapper {
|
||||||
using real = real_type;
|
using real_type = real_type_;
|
||||||
using complex = SNAComplex<real>;
|
using complex = SNAComplex<real_type>;
|
||||||
static constexpr int vector_length = vector_length_;
|
static constexpr int vector_length = vector_length_;
|
||||||
|
|
||||||
const int offset; // my offset into the vector (0, ..., vector_length - 1)
|
const int offset; // my offset into the vector (0, ..., vector_length - 1)
|
||||||
real* buffer; // buffer of real numbers
|
real_type* buffer; // buffer of real numbers
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
WignerWrapper(complex* buffer_, const int offset_)
|
WignerWrapper(complex* buffer_, const int offset_)
|
||||||
: offset(offset_), buffer(reinterpret_cast<real*>(buffer_))
|
: offset(offset_), buffer(reinterpret_cast<real_type*>(buffer_))
|
||||||
{ ; }
|
{ ; }
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
@ -56,26 +56,26 @@ struct alignas(8) FullHalfMapper {
|
|||||||
int flip_sign; // 0 -> isn't flipped, 1 -> conj, -1 -> -conj
|
int flip_sign; // 0 -> isn't flipped, 1 -> conj, -1 -> -conj
|
||||||
};
|
};
|
||||||
|
|
||||||
template<class DeviceType, typename real_type, int vector_length_>
|
template<class DeviceType, typename real_type_, int vector_length_>
|
||||||
class SNAKokkos {
|
class SNAKokkos {
|
||||||
|
|
||||||
public:
|
public:
|
||||||
using real = real_type;
|
using real_type = real_type_;
|
||||||
using complex = SNAComplex<real>;
|
using complex = SNAComplex<real_type>;
|
||||||
static constexpr int vector_length = vector_length_;
|
static constexpr int vector_length = vector_length_;
|
||||||
|
|
||||||
typedef Kokkos::View<int*, DeviceType> t_sna_1i;
|
typedef Kokkos::View<int*, DeviceType> t_sna_1i;
|
||||||
typedef Kokkos::View<real*, DeviceType> t_sna_1d;
|
typedef Kokkos::View<real_type*, DeviceType> t_sna_1d;
|
||||||
typedef Kokkos::View<real*, typename KKDevice<DeviceType>::value, Kokkos::MemoryTraits<Kokkos::Atomic> > t_sna_1d_atomic;
|
typedef Kokkos::View<real_type*, typename KKDevice<DeviceType>::value, Kokkos::MemoryTraits<Kokkos::Atomic> > t_sna_1d_atomic;
|
||||||
typedef Kokkos::View<int**, DeviceType> t_sna_2i;
|
typedef Kokkos::View<int**, DeviceType> t_sna_2i;
|
||||||
typedef Kokkos::View<real**, DeviceType> t_sna_2d;
|
typedef Kokkos::View<real_type**, DeviceType> t_sna_2d;
|
||||||
typedef Kokkos::View<real**, Kokkos::LayoutLeft, DeviceType> t_sna_2d_ll;
|
typedef Kokkos::View<real_type**, Kokkos::LayoutLeft, DeviceType> t_sna_2d_ll;
|
||||||
typedef Kokkos::View<real***, DeviceType> t_sna_3d;
|
typedef Kokkos::View<real_type***, DeviceType> t_sna_3d;
|
||||||
typedef Kokkos::View<real***, Kokkos::LayoutLeft, DeviceType> t_sna_3d_ll;
|
typedef Kokkos::View<real_type***, Kokkos::LayoutLeft, DeviceType> t_sna_3d_ll;
|
||||||
typedef Kokkos::View<real***[3], DeviceType> t_sna_4d;
|
typedef Kokkos::View<real_type***[3], DeviceType> t_sna_4d;
|
||||||
typedef Kokkos::View<real****, Kokkos::LayoutLeft, DeviceType> t_sna_4d_ll;
|
typedef Kokkos::View<real_type****, Kokkos::LayoutLeft, DeviceType> t_sna_4d_ll;
|
||||||
typedef Kokkos::View<real**[3], DeviceType> t_sna_3d3;
|
typedef Kokkos::View<real_type**[3], DeviceType> t_sna_3d3;
|
||||||
typedef Kokkos::View<real*****, DeviceType> t_sna_5d;
|
typedef Kokkos::View<real_type*****, DeviceType> t_sna_5d;
|
||||||
|
|
||||||
typedef Kokkos::View<complex*, DeviceType> t_sna_1c;
|
typedef Kokkos::View<complex*, DeviceType> t_sna_1c;
|
||||||
typedef Kokkos::View<complex*, typename KKDevice<DeviceType>::value, Kokkos::MemoryTraits<Kokkos::Atomic> > t_sna_1c_atomic;
|
typedef Kokkos::View<complex*, typename KKDevice<DeviceType>::value, Kokkos::MemoryTraits<Kokkos::Atomic> > t_sna_1c_atomic;
|
||||||
@ -93,10 +93,10 @@ public:
|
|||||||
inline
|
inline
|
||||||
SNAKokkos() {};
|
SNAKokkos() {};
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
SNAKokkos(const SNAKokkos<DeviceType,real,vector_length>& sna, const typename Kokkos::TeamPolicy<DeviceType>::member_type& team);
|
SNAKokkos(const SNAKokkos<DeviceType,real_type,vector_length>& sna, const typename Kokkos::TeamPolicy<DeviceType>::member_type& team);
|
||||||
|
|
||||||
inline
|
inline
|
||||||
SNAKokkos(real, int, real, int, int, int, int, int, int);
|
SNAKokkos(real_type, int, real_type, int, int, int, int, int, int);
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
~SNAKokkos();
|
~SNAKokkos();
|
||||||
@ -123,7 +123,7 @@ inline
|
|||||||
void compute_zi(const int&, const int&, const int&); // ForceSNAP
|
void compute_zi(const int&, const int&, const int&); // ForceSNAP
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
void compute_yi(int,int,int,
|
void compute_yi(int,int,int,
|
||||||
const Kokkos::View<real***, Kokkos::LayoutLeft, DeviceType> &beta_pack); // ForceSNAP
|
const Kokkos::View<real_type***, Kokkos::LayoutLeft, DeviceType> &beta_pack); // ForceSNAP
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
void compute_bi(const int&, const int&, const int&); // ForceSNAP
|
void compute_bi(const int&, const int&, const int&); // ForceSNAP
|
||||||
|
|
||||||
@ -136,7 +136,7 @@ inline
|
|||||||
void compute_zi_cpu(const int&); // ForceSNAP
|
void compute_zi_cpu(const int&); // ForceSNAP
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
void compute_yi_cpu(int,
|
void compute_yi_cpu(int,
|
||||||
const Kokkos::View<real**, DeviceType> &beta); // ForceSNAP
|
const Kokkos::View<real_type**, DeviceType> &beta); // ForceSNAP
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
void compute_bi_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int); // ForceSNAP
|
void compute_bi_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int); // ForceSNAP
|
||||||
|
|
||||||
@ -151,13 +151,13 @@ inline
|
|||||||
void compute_deidrj_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int, int); // ForceSNAP
|
void compute_deidrj_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int, int); // ForceSNAP
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
real compute_sfac(real, real); // add_uarraytot, compute_duarray
|
real_type compute_sfac(real_type, real_type); // add_uarraytot, compute_duarray
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
real compute_dsfac(real, real); // compute_duarray
|
real_type compute_dsfac(real_type, real_type); // compute_duarray
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
void compute_s_dsfac(const real, const real, real&, real&); // compute_cayley_klein
|
void compute_s_dsfac(const real_type, const real_type, real_type&, real_type&); // compute_cayley_klein
|
||||||
|
|
||||||
static KOKKOS_FORCEINLINE_FUNCTION
|
static KOKKOS_FORCEINLINE_FUNCTION
|
||||||
void sincos_wrapper(double x, double* sin_, double *cos_) { sincos(x, sin_, cos_); }
|
void sincos_wrapper(double x, double* sin_, double *cos_) { sincos(x, sin_, cos_); }
|
||||||
@ -224,7 +224,7 @@ inline
|
|||||||
int ntriples;
|
int ntriples;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
real rmin0, rfac0;
|
real_type rmin0, rfac0;
|
||||||
|
|
||||||
//use indexlist instead of loops, constructor generates these
|
//use indexlist instead of loops, constructor generates these
|
||||||
// Same across all SNAKokkos
|
// Same across all SNAKokkos
|
||||||
@ -265,12 +265,12 @@ inline
|
|||||||
void init_rootpqarray(); // init()
|
void init_rootpqarray(); // init()
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
void add_uarraytot(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int, int, const real&, const real&, const real&, int); // compute_ui
|
void add_uarraytot(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int, int, const real_type&, const real_type&, const real_type&, int); // compute_ui
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
void compute_uarray_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int, int,
|
void compute_uarray_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int, int,
|
||||||
const real&, const real&, const real&,
|
const real_type&, const real_type&, const real_type&,
|
||||||
const real&, const real&); // compute_ui_cpu
|
const real_type&, const real_type&); // compute_ui_cpu
|
||||||
|
|
||||||
|
|
||||||
inline
|
inline
|
||||||
@ -280,8 +280,8 @@ inline
|
|||||||
int compute_ncoeff(); // SNAKokkos()
|
int compute_ncoeff(); // SNAKokkos()
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
void compute_duarray_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int, int,
|
void compute_duarray_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int, int,
|
||||||
const real&, const real&, const real&, // compute_duidrj_cpu
|
const real_type&, const real_type&, const real_type&, // compute_duidrj_cpu
|
||||||
const real&, const real&, const real&, const real&, const real&);
|
const real_type&, const real_type&, const real_type&, const real_type&, const real_type&);
|
||||||
|
|
||||||
// Sets the style for the switching function
|
// Sets the style for the switching function
|
||||||
// 0 = none
|
// 0 = none
|
||||||
@ -293,11 +293,11 @@ inline
|
|||||||
int bnorm_flag;
|
int bnorm_flag;
|
||||||
|
|
||||||
// Self-weight
|
// Self-weight
|
||||||
real wself;
|
real_type wself;
|
||||||
int wselfall_flag;
|
int wselfall_flag;
|
||||||
|
|
||||||
int bzero_flag; // 1 if bzero subtracted from barray
|
int bzero_flag; // 1 if bzero subtracted from barray
|
||||||
Kokkos::View<real*, DeviceType> bzero; // array of B values for isolated atoms
|
Kokkos::View<real_type*, DeviceType> bzero; // array of B values for isolated atoms
|
||||||
|
|
||||||
// for per-direction dulist calculation, specify the direction.
|
// for per-direction dulist calculation, specify the direction.
|
||||||
int dir;
|
int dir;
|
||||||
|
|||||||
@ -25,16 +25,16 @@ namespace LAMMPS_NS {
|
|||||||
|
|
||||||
static const double MY_PI = 3.14159265358979323846; // pi
|
static const double MY_PI = 3.14159265358979323846; // pi
|
||||||
|
|
||||||
template<class DeviceType, typename real, int vector_length>
|
template<class DeviceType, typename real_type, int vector_length>
|
||||||
inline
|
inline
|
||||||
SNAKokkos<DeviceType, real, vector_length>::SNAKokkos(real rfac0_in,
|
SNAKokkos<DeviceType, real_type, vector_length>::SNAKokkos(real_type rfac0_in,
|
||||||
int twojmax_in, real rmin0_in, int switch_flag_in, int bzero_flag_in,
|
int twojmax_in, real_type rmin0_in, int switch_flag_in, int bzero_flag_in,
|
||||||
int chem_flag_in, int bnorm_flag_in, int wselfall_flag_in, int nelements_in)
|
int chem_flag_in, int bnorm_flag_in, int wselfall_flag_in, int nelements_in)
|
||||||
{
|
{
|
||||||
LAMMPS_NS::ExecutionSpace execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
|
LAMMPS_NS::ExecutionSpace execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
|
||||||
host_flag = (execution_space == LAMMPS_NS::Host);
|
host_flag = (execution_space == LAMMPS_NS::Host);
|
||||||
|
|
||||||
wself = static_cast<real>(1.0);
|
wself = static_cast<real_type>(1.0);
|
||||||
|
|
||||||
rfac0 = rfac0_in;
|
rfac0 = rfac0_in;
|
||||||
rmin0 = rmin0_in;
|
rmin0 = rmin0_in;
|
||||||
@ -63,7 +63,7 @@ SNAKokkos<DeviceType, real, vector_length>::SNAKokkos(real rfac0_in,
|
|||||||
cglist = t_sna_1d("SNAKokkos::cglist",idxcg_max);
|
cglist = t_sna_1d("SNAKokkos::cglist",idxcg_max);
|
||||||
|
|
||||||
if (bzero_flag) {
|
if (bzero_flag) {
|
||||||
bzero = Kokkos::View<real*, Kokkos::LayoutRight, DeviceType>("sna:bzero",twojmax+1);
|
bzero = Kokkos::View<real_type*, Kokkos::LayoutRight, DeviceType>("sna:bzero",twojmax+1);
|
||||||
auto h_bzero = Kokkos::create_mirror_view(bzero);
|
auto h_bzero = Kokkos::create_mirror_view(bzero);
|
||||||
|
|
||||||
double www = wself*wself*wself;
|
double www = wself*wself*wself;
|
||||||
@ -78,15 +78,15 @@ SNAKokkos<DeviceType, real, vector_length>::SNAKokkos(real rfac0_in,
|
|||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
template<class DeviceType, typename real, int vector_length>
|
template<class DeviceType, typename real_type, int vector_length>
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
SNAKokkos<DeviceType, real, vector_length>::~SNAKokkos()
|
SNAKokkos<DeviceType, real_type, vector_length>::~SNAKokkos()
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class DeviceType, typename real, int vector_length>
|
template<class DeviceType, typename real_type, int vector_length>
|
||||||
inline
|
inline
|
||||||
void SNAKokkos<DeviceType, real, vector_length>::build_indexlist()
|
void SNAKokkos<DeviceType, real_type, vector_length>::build_indexlist()
|
||||||
{
|
{
|
||||||
// index list for cglist
|
// index list for cglist
|
||||||
|
|
||||||
@ -274,17 +274,17 @@ void SNAKokkos<DeviceType, real, vector_length>::build_indexlist()
|
|||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
template<class DeviceType, typename real, int vector_length>
|
template<class DeviceType, typename real_type, int vector_length>
|
||||||
inline
|
inline
|
||||||
void SNAKokkos<DeviceType, real, vector_length>::init()
|
void SNAKokkos<DeviceType, real_type, vector_length>::init()
|
||||||
{
|
{
|
||||||
init_clebsch_gordan();
|
init_clebsch_gordan();
|
||||||
init_rootpqarray();
|
init_rootpqarray();
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class DeviceType, typename real, int vector_length>
|
template<class DeviceType, typename real_type, int vector_length>
|
||||||
inline
|
inline
|
||||||
void SNAKokkos<DeviceType, real, vector_length>::grow_rij(int newnatom, int newnmax)
|
void SNAKokkos<DeviceType, real_type, vector_length>::grow_rij(int newnatom, int newnmax)
|
||||||
{
|
{
|
||||||
if(newnatom <= natom && newnmax <= nmax) return;
|
if(newnatom <= natom && newnmax <= nmax) return;
|
||||||
natom = newnatom;
|
natom = newnatom;
|
||||||
@ -358,9 +358,9 @@ void SNAKokkos<DeviceType, real, vector_length>::grow_rij(int newnatom, int newn
|
|||||||
ComputeFusedDeidrj, which are one warp per atom-neighbor pair.
|
ComputeFusedDeidrj, which are one warp per atom-neighbor pair.
|
||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
template<class DeviceType, typename real, int vector_length>
|
template<class DeviceType, typename real_type, int vector_length>
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
void SNAKokkos<DeviceType, real, vector_length>::compute_cayley_klein(const int& iatom_mod, const int& jnbor, const int& iatom_div)
|
void SNAKokkos<DeviceType, real_type, vector_length>::compute_cayley_klein(const int& iatom_mod, const int& jnbor, const int& iatom_div)
|
||||||
{
|
{
|
||||||
const int iatom = iatom_mod + vector_length * iatom_div;
|
const int iatom = iatom_mod + vector_length * iatom_div;
|
||||||
const auto x = rij(iatom,jnbor,0);
|
const auto x = rij(iatom,jnbor,0);
|
||||||
@ -369,25 +369,25 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_cayley_klein(const int&
|
|||||||
const auto rsq = x * x + y * y + z * z;
|
const auto rsq = x * x + y * y + z * z;
|
||||||
const auto r = sqrt(rsq);
|
const auto r = sqrt(rsq);
|
||||||
const auto rcut = rcutij(iatom, jnbor);
|
const auto rcut = rcutij(iatom, jnbor);
|
||||||
const auto rscale0 = rfac0 * static_cast<real>(MY_PI) / (rcut - rmin0);
|
const auto rscale0 = rfac0 * static_cast<real_type>(MY_PI) / (rcut - rmin0);
|
||||||
const auto theta0 = (r - rmin0) * rscale0;
|
const auto theta0 = (r - rmin0) * rscale0;
|
||||||
real sn, cs;
|
real_type sn, cs;
|
||||||
sincos_wrapper(theta0, &sn, &cs);
|
sincos_wrapper(theta0, &sn, &cs);
|
||||||
const real z0 = r * cs / sn;
|
const real_type z0 = r * cs / sn;
|
||||||
const real dz0dr = z0 / r - (r*rscale0) * (rsq + z0 * z0) / rsq;
|
const real_type dz0dr = z0 / r - (r*rscale0) * (rsq + z0 * z0) / rsq;
|
||||||
|
|
||||||
const auto wj_local = wj(iatom, jnbor);
|
const auto wj_local = wj(iatom, jnbor);
|
||||||
real sfac, dsfac;
|
real_type sfac, dsfac;
|
||||||
compute_s_dsfac(r, rcut, sfac, dsfac);
|
compute_s_dsfac(r, rcut, sfac, dsfac);
|
||||||
sfac *= wj_local;
|
sfac *= wj_local;
|
||||||
dsfac *= wj_local;
|
dsfac *= wj_local;
|
||||||
|
|
||||||
const auto rinv = static_cast<real>(1.0) / r;
|
const auto rinv = static_cast<real_type>(1.0) / r;
|
||||||
const auto ux = x * rinv;
|
const auto ux = x * rinv;
|
||||||
const auto uy = y * rinv;
|
const auto uy = y * rinv;
|
||||||
const auto uz = z * rinv;
|
const auto uz = z * rinv;
|
||||||
|
|
||||||
const auto r0inv = static_cast<real>(1.0) / sqrt(r * r + z0 * z0);
|
const auto r0inv = static_cast<real_type>(1.0) / sqrt(r * r + z0 * z0);
|
||||||
|
|
||||||
const complex a = { z0 * r0inv, -z * r0inv };
|
const complex a = { z0 * r0inv, -z * r0inv };
|
||||||
const complex b = { r0inv * y, -r0inv * x };
|
const complex b = { r0inv * y, -r0inv * x };
|
||||||
@ -433,9 +433,9 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_cayley_klein(const int&
|
|||||||
|
|
||||||
// we need to explicitly zero `dedr` somewhere before hitting
|
// we need to explicitly zero `dedr` somewhere before hitting
|
||||||
// ComputeFusedDeidrj --- this is just a convenient place to do it.
|
// ComputeFusedDeidrj --- this is just a convenient place to do it.
|
||||||
dedr(iatom_mod + vector_length * iatom_div, jnbor, 0) = static_cast<real>(0.);
|
dedr(iatom_mod + vector_length * iatom_div, jnbor, 0) = static_cast<real_type>(0.);
|
||||||
dedr(iatom_mod + vector_length * iatom_div, jnbor, 1) = static_cast<real>(0.);
|
dedr(iatom_mod + vector_length * iatom_div, jnbor, 1) = static_cast<real_type>(0.);
|
||||||
dedr(iatom_mod + vector_length * iatom_div, jnbor, 2) = static_cast<real>(0.);
|
dedr(iatom_mod + vector_length * iatom_div, jnbor, 2) = static_cast<real_type>(0.);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -445,9 +445,9 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_cayley_klein(const int&
|
|||||||
advantage of the symmetry of the Wigner U matrices.
|
advantage of the symmetry of the Wigner U matrices.
|
||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
template<class DeviceType, typename real, int vector_length>
|
template<class DeviceType, typename real_type, int vector_length>
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
void SNAKokkos<DeviceType, real, vector_length>::pre_ui(const int& iatom_mod, const int& j, const int& ielem, const int& iatom_div)
|
void SNAKokkos<DeviceType, real_type, vector_length>::pre_ui(const int& iatom_mod, const int& j, const int& ielem, const int& iatom_div)
|
||||||
{
|
{
|
||||||
|
|
||||||
for (int jelem = 0; jelem < nelements; jelem++) {
|
for (int jelem = 0; jelem < nelements; jelem++) {
|
||||||
@ -459,11 +459,11 @@ void SNAKokkos<DeviceType, real, vector_length>::pre_ui(const int& iatom_mod, co
|
|||||||
for (int mb = 0; 2*mb <= j; mb++) {
|
for (int mb = 0; 2*mb <= j; mb++) {
|
||||||
for (int ma = 0; ma <= j; ma++) {
|
for (int ma = 0; ma <= j; ma++) {
|
||||||
|
|
||||||
real re_part = static_cast<real>(0.);
|
real_type re_part = static_cast<real_type>(0.);
|
||||||
if (ma == mb && (!chem_flag || ielem == jelem || wselfall_flag)) { re_part = wself; }
|
if (ma == mb && (!chem_flag || ielem == jelem || wselfall_flag)) { re_part = wself; }
|
||||||
|
|
||||||
ulisttot_re_pack(iatom_mod, jju_half, jelem, iatom_div) = re_part;
|
ulisttot_re_pack(iatom_mod, jju_half, jelem, iatom_div) = re_part;
|
||||||
ulisttot_im_pack(iatom_mod, jju_half, jelem, iatom_div) = static_cast<real>(0.);
|
ulisttot_im_pack(iatom_mod, jju_half, jelem, iatom_div) = static_cast<real_type>(0.);
|
||||||
|
|
||||||
jju_half++;
|
jju_half++;
|
||||||
}
|
}
|
||||||
@ -477,9 +477,9 @@ void SNAKokkos<DeviceType, real, vector_length>::pre_ui(const int& iatom_mod, co
|
|||||||
accumulating to the total. GPU only.
|
accumulating to the total. GPU only.
|
||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
template<class DeviceType, typename real, int vector_length>
|
template<class DeviceType, typename real_type, int vector_length>
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
void SNAKokkos<DeviceType, real, vector_length>::compute_ui(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, const int iatom_mod, const int j_bend, const int jnbor, const int iatom_div)
|
void SNAKokkos<DeviceType, real_type, vector_length>::compute_ui(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, const int iatom_mod, const int j_bend, const int jnbor, const int iatom_div)
|
||||||
{
|
{
|
||||||
|
|
||||||
// utot(j,ma,mb) = 0 for all j,ma,ma
|
// utot(j,ma,mb) = 0 for all j,ma,ma
|
||||||
@ -496,7 +496,7 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_ui(const typename Kokko
|
|||||||
const int scratch_shift = team_rank * tile_size;
|
const int scratch_shift = team_rank * tile_size;
|
||||||
|
|
||||||
// extract and wrap
|
// extract and wrap
|
||||||
WignerWrapper<real, vector_length> ulist_wrapper((complex*)team.team_shmem().get_shmem(team.team_size() * tile_size * sizeof(complex), 0) + scratch_shift, iatom_mod);
|
WignerWrapper<real_type, vector_length> ulist_wrapper((complex*)team.team_shmem().get_shmem(team.team_size() * tile_size * sizeof(complex), 0) + scratch_shift, iatom_mod);
|
||||||
|
|
||||||
|
|
||||||
// load parameters
|
// load parameters
|
||||||
@ -532,7 +532,7 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_ui(const typename Kokko
|
|||||||
const complex ulist_prev = ulist_wrapper.get(ma);
|
const complex ulist_prev = ulist_wrapper.get(ma);
|
||||||
|
|
||||||
// ulist_accum += rootpq * a.conj() * ulist_prev;
|
// ulist_accum += rootpq * a.conj() * ulist_prev;
|
||||||
real rootpq = rootpqarray(j - ma, j - mb);
|
real_type rootpq = rootpqarray(j - ma, j - mb);
|
||||||
ulist_accum.re += rootpq * (a.re * ulist_prev.re + a.im * ulist_prev.im);
|
ulist_accum.re += rootpq * (a.re * ulist_prev.re + a.im * ulist_prev.im);
|
||||||
ulist_accum.im += rootpq * (a.re * ulist_prev.im - a.im * ulist_prev.re);
|
ulist_accum.im += rootpq * (a.re * ulist_prev.im - a.im * ulist_prev.re);
|
||||||
|
|
||||||
@ -572,7 +572,7 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_ui(const typename Kokko
|
|||||||
Kokkos::atomic_add(&(ulisttot_im_pack(iatom_mod, jjup + ma, jelem, iatom_div)), ulist_prev.im * sfac);
|
Kokkos::atomic_add(&(ulisttot_im_pack(iatom_mod, jjup + ma, jelem, iatom_div)), ulist_prev.im * sfac);
|
||||||
|
|
||||||
// ulist_accum += rootpq * b * ulist_prev;
|
// ulist_accum += rootpq * b * ulist_prev;
|
||||||
real rootpq = rootpqarray(j - ma, mb);
|
real_type rootpq = rootpqarray(j - ma, mb);
|
||||||
ulist_accum.re += rootpq * (b.re * ulist_prev.re - b.im * ulist_prev.im);
|
ulist_accum.re += rootpq * (b.re * ulist_prev.re - b.im * ulist_prev.im);
|
||||||
ulist_accum.im += rootpq * (b.re * ulist_prev.im + b.im * ulist_prev.re);
|
ulist_accum.im += rootpq * (b.re * ulist_prev.im + b.im * ulist_prev.re);
|
||||||
|
|
||||||
@ -614,9 +614,9 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_ui(const typename Kokko
|
|||||||
divergence. GPU version
|
divergence. GPU version
|
||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
template<class DeviceType, typename real, int vector_length>
|
template<class DeviceType, typename real_type, int vector_length>
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
void SNAKokkos<DeviceType, real, vector_length>::compute_zi(const int& iatom_mod, const int& jjz, const int& iatom_div)
|
void SNAKokkos<DeviceType, real_type, vector_length>::compute_zi(const int& iatom_mod, const int& jjz, const int& iatom_div)
|
||||||
{
|
{
|
||||||
|
|
||||||
const int j1 = idxz(jjz, 0);
|
const int j1 = idxz(jjz, 0);
|
||||||
@ -629,7 +629,7 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_zi(const int& iatom_mod
|
|||||||
const int na = idxz(jjz, 7);
|
const int na = idxz(jjz, 7);
|
||||||
const int nb = idxz(jjz, 8);
|
const int nb = idxz(jjz, 8);
|
||||||
|
|
||||||
const real* cgblock = cglist.data() + idxcg_block(j1, j2, j);
|
const real_type* cgblock = cglist.data() + idxcg_block(j1, j2, j);
|
||||||
|
|
||||||
int idouble = 0;
|
int idouble = 0;
|
||||||
|
|
||||||
@ -688,9 +688,9 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_zi(const int& iatom_mod
|
|||||||
divergence.
|
divergence.
|
||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
template<class DeviceType, typename real, int vector_length>
|
template<class DeviceType, typename real_type, int vector_length>
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
void SNAKokkos<DeviceType, real, vector_length>::compute_bi(const int& iatom_mod, const int& jjb, const int& iatom_div)
|
void SNAKokkos<DeviceType, real_type, vector_length>::compute_bi(const int& iatom_mod, const int& jjb, const int& iatom_div)
|
||||||
{
|
{
|
||||||
// for j1 = 0,...,twojmax
|
// for j1 = 0,...,twojmax
|
||||||
// for j2 = 0,twojmax
|
// for j2 = 0,twojmax
|
||||||
@ -751,10 +751,10 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_bi(const int& iatom_mod
|
|||||||
|
|
||||||
const auto utot = ulisttot_pack(iatom_mod, jju_index, elem3, iatom_div);
|
const auto utot = ulisttot_pack(iatom_mod, jju_index, elem3, iatom_div);
|
||||||
const auto zloc = zlist_pack(iatom_mod, jjz_index, idouble, iatom_div);
|
const auto zloc = zlist_pack(iatom_mod, jjz_index, idouble, iatom_div);
|
||||||
sumzu += static_cast<real>(0.5) * (utot.re * zloc.re + utot.im * zloc.im);
|
sumzu += static_cast<real_type>(0.5) * (utot.re * zloc.re + utot.im * zloc.im);
|
||||||
} // end if jeven
|
} // end if jeven
|
||||||
|
|
||||||
sumzu *= static_cast<real>(2.0);
|
sumzu *= static_cast<real_type>(2.0);
|
||||||
if (bzero_flag) {
|
if (bzero_flag) {
|
||||||
if (!wselfall_flag) {
|
if (!wselfall_flag) {
|
||||||
if (elem1 == elem2 && elem1 == elem3) {
|
if (elem1 == elem2 && elem1 == elem3) {
|
||||||
@ -781,12 +781,12 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_bi(const int& iatom_mod
|
|||||||
divergence. GPU version.
|
divergence. GPU version.
|
||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
template<class DeviceType, typename real, int vector_length>
|
template<class DeviceType, typename real_type, int vector_length>
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
void SNAKokkos<DeviceType, real, vector_length>::compute_yi(int iatom_mod, int jjz, int iatom_div,
|
void SNAKokkos<DeviceType, real_type, vector_length>::compute_yi(int iatom_mod, int jjz, int iatom_div,
|
||||||
const Kokkos::View<real***, Kokkos::LayoutLeft, DeviceType> &beta_pack)
|
const Kokkos::View<real_type***, Kokkos::LayoutLeft, DeviceType> &beta_pack)
|
||||||
{
|
{
|
||||||
real betaj;
|
real_type betaj;
|
||||||
|
|
||||||
const int j1 = idxz(jjz, 0);
|
const int j1 = idxz(jjz, 0);
|
||||||
const int j2 = idxz(jjz, 1);
|
const int j2 = idxz(jjz, 1);
|
||||||
@ -799,15 +799,15 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_yi(int iatom_mod, int j
|
|||||||
const int nb = idxz(jjz, 8);
|
const int nb = idxz(jjz, 8);
|
||||||
const int jju_half = idxz(jjz, 9);
|
const int jju_half = idxz(jjz, 9);
|
||||||
|
|
||||||
const real *cgblock = cglist.data() + idxcg_block(j1,j2,j);
|
const real_type *cgblock = cglist.data() + idxcg_block(j1,j2,j);
|
||||||
//int mb = (2 * (mb1min+mb2max) - j1 - j2 + j) / 2;
|
//int mb = (2 * (mb1min+mb2max) - j1 - j2 + j) / 2;
|
||||||
//int ma = (2 * (ma1min+ma2max) - j1 - j2 + j) / 2;
|
//int ma = (2 * (ma1min+ma2max) - j1 - j2 + j) / 2;
|
||||||
|
|
||||||
for (int elem1 = 0; elem1 < nelements; elem1++) {
|
for (int elem1 = 0; elem1 < nelements; elem1++) {
|
||||||
for (int elem2 = 0; elem2 < nelements; elem2++) {
|
for (int elem2 = 0; elem2 < nelements; elem2++) {
|
||||||
|
|
||||||
real ztmp_r = 0.0;
|
real_type ztmp_r = 0.0;
|
||||||
real ztmp_i = 0.0;
|
real_type ztmp_i = 0.0;
|
||||||
|
|
||||||
int jju1 = idxu_block[j1] + (j1 + 1) * mb1min;
|
int jju1 = idxu_block[j1] + (j1 + 1) * mb1min;
|
||||||
int jju2 = idxu_block[j2] + (j2 + 1) * mb2max;
|
int jju2 = idxu_block[j2] + (j2 + 1) * mb2max;
|
||||||
@ -888,9 +888,9 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_yi(int iatom_mod, int j
|
|||||||
and accumulation into dEidRj. GPU only.
|
and accumulation into dEidRj. GPU only.
|
||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
template<class DeviceType, typename real, int vector_length>
|
template<class DeviceType, typename real_type, int vector_length>
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
void SNAKokkos<DeviceType, real, vector_length>::compute_fused_deidrj(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, const int iatom_mod, const int j_bend, const int jnbor, const int iatom_div)
|
void SNAKokkos<DeviceType, real_type, vector_length>::compute_fused_deidrj(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, const int iatom_mod, const int j_bend, const int jnbor, const int iatom_div)
|
||||||
{
|
{
|
||||||
// get shared memory offset
|
// get shared memory offset
|
||||||
// scratch size: 32 atoms * (twojmax+1) cached values, no double buffer
|
// scratch size: 32 atoms * (twojmax+1) cached values, no double buffer
|
||||||
@ -900,8 +900,8 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_fused_deidrj(const type
|
|||||||
const int scratch_shift = team_rank * tile_size;
|
const int scratch_shift = team_rank * tile_size;
|
||||||
|
|
||||||
// extract, wrap shared memory buffer
|
// extract, wrap shared memory buffer
|
||||||
WignerWrapper<real, vector_length> ulist_wrapper((complex*)team.team_shmem().get_shmem(team.team_size() * tile_size * sizeof(complex), 0) + scratch_shift, iatom_mod);
|
WignerWrapper<real_type, vector_length> ulist_wrapper((complex*)team.team_shmem().get_shmem(team.team_size() * tile_size * sizeof(complex), 0) + scratch_shift, iatom_mod);
|
||||||
WignerWrapper<real, vector_length> dulist_wrapper((complex*)team.team_shmem().get_shmem(team.team_size() * tile_size * sizeof(complex), 0) + scratch_shift, iatom_mod);
|
WignerWrapper<real_type, vector_length> dulist_wrapper((complex*)team.team_shmem().get_shmem(team.team_size() * tile_size * sizeof(complex), 0) + scratch_shift, iatom_mod);
|
||||||
|
|
||||||
// load parameters
|
// load parameters
|
||||||
const auto a = a_pack(iatom_mod, jnbor, iatom_div);
|
const auto a = a_pack(iatom_mod, jnbor, iatom_div);
|
||||||
@ -913,7 +913,7 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_fused_deidrj(const type
|
|||||||
|
|
||||||
const int jelem = element(iatom_mod + vector_length * iatom_div, jnbor);
|
const int jelem = element(iatom_mod + vector_length * iatom_div, jnbor);
|
||||||
|
|
||||||
auto dedr_full_sum = static_cast<real>(0.);
|
auto dedr_full_sum = static_cast<real_type>(0.);
|
||||||
|
|
||||||
// we need to "choose" when to bend
|
// we need to "choose" when to bend
|
||||||
// this for loop is here for context --- we expose additional
|
// this for loop is here for context --- we expose additional
|
||||||
@ -944,7 +944,7 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_fused_deidrj(const type
|
|||||||
const complex dulist_prev = dulist_wrapper.get(ma);
|
const complex dulist_prev = dulist_wrapper.get(ma);
|
||||||
|
|
||||||
// ulist_accum += rootpq * a.conj() * ulist_prev;
|
// ulist_accum += rootpq * a.conj() * ulist_prev;
|
||||||
real rootpq = rootpqarray(j - ma, j - mb);
|
real_type rootpq = rootpqarray(j - ma, j - mb);
|
||||||
ulist_accum.re += rootpq * (a.re * ulist_prev.re + a.im * ulist_prev.im);
|
ulist_accum.re += rootpq * (a.re * ulist_prev.re + a.im * ulist_prev.im);
|
||||||
ulist_accum.im += rootpq * (a.re * ulist_prev.im - a.im * ulist_prev.re);
|
ulist_accum.im += rootpq * (a.re * ulist_prev.im - a.im * ulist_prev.re);
|
||||||
|
|
||||||
@ -996,7 +996,7 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_fused_deidrj(const type
|
|||||||
const complex dulist_prev = dulist_wrapper.get(ma);
|
const complex dulist_prev = dulist_wrapper.get(ma);
|
||||||
|
|
||||||
// ulist_accum += rootpq * b * ulist_prev;
|
// ulist_accum += rootpq * b * ulist_prev;
|
||||||
real rootpq = rootpqarray(j - ma, mb);
|
real_type rootpq = rootpqarray(j - ma, mb);
|
||||||
ulist_accum.re += rootpq * (b.re * ulist_prev.re - b.im * ulist_prev.im);
|
ulist_accum.re += rootpq * (b.re * ulist_prev.re - b.im * ulist_prev.im);
|
||||||
ulist_accum.im += rootpq * (b.re * ulist_prev.im + b.im * ulist_prev.re);
|
ulist_accum.im += rootpq * (b.re * ulist_prev.im + b.im * ulist_prev.re);
|
||||||
|
|
||||||
@ -1037,8 +1037,8 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_fused_deidrj(const type
|
|||||||
// grab y_local early
|
// grab y_local early
|
||||||
auto y_local = complex(ylist_pack_re(iatom_mod, jjup + ma, jelem, iatom_div), ylist_pack_im(iatom_mod, jjup+ma, jelem, iatom_div));
|
auto y_local = complex(ylist_pack_re(iatom_mod, jjup + ma, jelem, iatom_div), ylist_pack_im(iatom_mod, jjup+ma, jelem, iatom_div));
|
||||||
if (j % 2 == 1 && 2*(mb-1) == j-1) { // double check me...
|
if (j % 2 == 1 && 2*(mb-1) == j-1) { // double check me...
|
||||||
if (ma == (mb-1)) { y_local = static_cast<real>(0.5)*y_local; }
|
if (ma == (mb-1)) { y_local = static_cast<real_type>(0.5)*y_local; }
|
||||||
else if (ma > (mb-1)) { y_local.re = static_cast<real>(0.); y_local.im = static_cast<real>(0.); } // can probably avoid this outright
|
else if (ma > (mb-1)) { y_local.re = static_cast<real_type>(0.); y_local.im = static_cast<real_type>(0.); } // can probably avoid this outright
|
||||||
// else the ma < mb gets "double counted", cancelling the 0.5.
|
// else the ma < mb gets "double counted", cancelling the 0.5.
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1053,7 +1053,7 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_fused_deidrj(const type
|
|||||||
//} // end reference loop over j_bend
|
//} // end reference loop over j_bend
|
||||||
|
|
||||||
// dedr gets zeroed out at the start of each iteration in compute_cayley_klein
|
// dedr gets zeroed out at the start of each iteration in compute_cayley_klein
|
||||||
Kokkos::atomic_add(&(dedr(iatom_mod + vector_length * iatom_div, jnbor, dir)), static_cast<real>(2.0) * dedr_full_sum);
|
Kokkos::atomic_add(&(dedr(iatom_mod + vector_length * iatom_div, jnbor, dir)), static_cast<real_type>(2.0) * dedr_full_sum);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1068,9 +1068,9 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_fused_deidrj(const type
|
|||||||
advantage of the symmetry of the Wigner U matrices.
|
advantage of the symmetry of the Wigner U matrices.
|
||||||
* ------------------------------------------------------------------------- */
|
* ------------------------------------------------------------------------- */
|
||||||
|
|
||||||
template<class DeviceType, typename real, int vector_length>
|
template<class DeviceType, typename real_type, int vector_length>
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
void SNAKokkos<DeviceType, real, vector_length>::pre_ui_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, const int& iatom, const int& ielem)
|
void SNAKokkos<DeviceType, real_type, vector_length>::pre_ui_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, const int& iatom, const int& ielem)
|
||||||
{
|
{
|
||||||
for (int jelem = 0; jelem < nelements; jelem++) {
|
for (int jelem = 0; jelem < nelements; jelem++) {
|
||||||
for (int j = 0; j <= twojmax; j++) {
|
for (int j = 0; j <= twojmax; j++) {
|
||||||
@ -1085,7 +1085,7 @@ void SNAKokkos<DeviceType, real, vector_length>::pre_ui_cpu(const typename Kokko
|
|||||||
|
|
||||||
// if m is on the "diagonal", initialize it with the self energy.
|
// if m is on the "diagonal", initialize it with the self energy.
|
||||||
// Otherwise zero it out
|
// Otherwise zero it out
|
||||||
complex init(static_cast<real>(0.),static_cast<real>(0.));
|
complex init(static_cast<real_type>(0.),static_cast<real_type>(0.));
|
||||||
if (m % (j+2) == 0 && (!chem_flag || ielem == jelem || wselfall_flag)) { init.re = wself; } //need to map iatom to element
|
if (m % (j+2) == 0 && (!chem_flag || ielem == jelem || wselfall_flag)) { init.re = wself; } //need to map iatom to element
|
||||||
|
|
||||||
ulisttot(jjup, jelem, iatom) = init;
|
ulisttot(jjup, jelem, iatom) = init;
|
||||||
@ -1102,11 +1102,11 @@ void SNAKokkos<DeviceType, real, vector_length>::pre_ui_cpu(const typename Kokko
|
|||||||
data layout comments.
|
data layout comments.
|
||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
template<class DeviceType, typename real, int vector_length>
|
template<class DeviceType, typename real_type, int vector_length>
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
void SNAKokkos<DeviceType, real, vector_length>::compute_ui_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int iatom, int jnbor)
|
void SNAKokkos<DeviceType, real_type, vector_length>::compute_ui_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int iatom, int jnbor)
|
||||||
{
|
{
|
||||||
real rsq, r, x, y, z, z0, theta0;
|
real_type rsq, r, x, y, z, z0, theta0;
|
||||||
|
|
||||||
// utot(j,ma,mb) = 0 for all j,ma,ma
|
// utot(j,ma,mb) = 0 for all j,ma,ma
|
||||||
// utot(j,ma,ma) = 1 for all j,ma
|
// utot(j,ma,ma) = 1 for all j,ma
|
||||||
@ -1132,9 +1132,9 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_ui_cpu(const typename K
|
|||||||
compute Zi by summing over products of Ui, CPU version
|
compute Zi by summing over products of Ui, CPU version
|
||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
template<class DeviceType, typename real, int vector_length>
|
template<class DeviceType, typename real_type, int vector_length>
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
void SNAKokkos<DeviceType, real, vector_length>::compute_zi_cpu(const int& iter)
|
void SNAKokkos<DeviceType, real_type, vector_length>::compute_zi_cpu(const int& iter)
|
||||||
{
|
{
|
||||||
const int iatom = iter / idxz_max;
|
const int iatom = iter / idxz_max;
|
||||||
const int jjz = iter % idxz_max;
|
const int jjz = iter % idxz_max;
|
||||||
@ -1149,22 +1149,22 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_zi_cpu(const int& iter)
|
|||||||
const int na = idxz(jjz, 7);
|
const int na = idxz(jjz, 7);
|
||||||
const int nb = idxz(jjz, 8);
|
const int nb = idxz(jjz, 8);
|
||||||
|
|
||||||
const real *cgblock = cglist.data() + idxcg_block(j1,j2,j);
|
const real_type *cgblock = cglist.data() + idxcg_block(j1,j2,j);
|
||||||
|
|
||||||
int idouble = 0;
|
int idouble = 0;
|
||||||
|
|
||||||
for (int elem1 = 0; elem1 < nelements; elem1++) {
|
for (int elem1 = 0; elem1 < nelements; elem1++) {
|
||||||
for (int elem2 = 0; elem2 < nelements; elem2++) {
|
for (int elem2 = 0; elem2 < nelements; elem2++) {
|
||||||
zlist(jjz, idouble, iatom).re = static_cast<real>(0.0);
|
zlist(jjz, idouble, iatom).re = static_cast<real_type>(0.0);
|
||||||
zlist(jjz, idouble, iatom).im = static_cast<real>(0.0);
|
zlist(jjz, idouble, iatom).im = static_cast<real_type>(0.0);
|
||||||
|
|
||||||
int jju1 = idxu_block[j1] + (j1+1)*mb1min;
|
int jju1 = idxu_block[j1] + (j1+1)*mb1min;
|
||||||
int jju2 = idxu_block[j2] + (j2+1)*mb2max;
|
int jju2 = idxu_block[j2] + (j2+1)*mb2max;
|
||||||
int icgb = mb1min*(j2+1) + mb2max;
|
int icgb = mb1min*(j2+1) + mb2max;
|
||||||
for(int ib = 0; ib < nb; ib++) {
|
for(int ib = 0; ib < nb; ib++) {
|
||||||
|
|
||||||
real suma1_r = static_cast<real>(0.0);
|
real_type suma1_r = static_cast<real_type>(0.0);
|
||||||
real suma1_i = static_cast<real>(0.0);
|
real_type suma1_i = static_cast<real_type>(0.0);
|
||||||
|
|
||||||
int ma1 = ma1min;
|
int ma1 = ma1min;
|
||||||
int ma2 = ma2max;
|
int ma2 = ma2max;
|
||||||
@ -1201,9 +1201,9 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_zi_cpu(const int& iter)
|
|||||||
compute Bi by summing conj(Ui)*Zi, CPU version
|
compute Bi by summing conj(Ui)*Zi, CPU version
|
||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
template<class DeviceType, typename real, int vector_length>
|
template<class DeviceType, typename real_type, int vector_length>
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
void SNAKokkos<DeviceType, real, vector_length>::compute_bi_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int iatom)
|
void SNAKokkos<DeviceType, real_type, vector_length>::compute_bi_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int iatom)
|
||||||
{
|
{
|
||||||
// for j1 = 0,...,twojmax
|
// for j1 = 0,...,twojmax
|
||||||
// for j2 = 0,twojmax
|
// for j2 = 0,twojmax
|
||||||
@ -1229,11 +1229,11 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_bi_cpu(const typename K
|
|||||||
|
|
||||||
int jjz = idxz_block(j1, j2, j);
|
int jjz = idxz_block(j1, j2, j);
|
||||||
int jju = idxu_block[j];
|
int jju = idxu_block[j];
|
||||||
real sumzu = static_cast<real>(0.0);
|
real_type sumzu = static_cast<real_type>(0.0);
|
||||||
real sumzu_temp = static_cast<real>(0.0);
|
real_type sumzu_temp = static_cast<real_type>(0.0);
|
||||||
const int bound = (j+2)/2;
|
const int bound = (j+2)/2;
|
||||||
Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(team,(j+1)*bound),
|
Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(team,(j+1)*bound),
|
||||||
[&] (const int mbma, real& sum) {
|
[&] (const int mbma, real_type& sum) {
|
||||||
//for(int mb = 0; 2*mb < j; mb++)
|
//for(int mb = 0; 2*mb < j; mb++)
|
||||||
//for(int ma = 0; ma <= j; ma++) {
|
//for(int ma = 0; ma <= j; ma++) {
|
||||||
const int ma = mbma % (j + 1);
|
const int ma = mbma % (j + 1);
|
||||||
@ -1252,7 +1252,7 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_bi_cpu(const typename K
|
|||||||
if (j%2 == 0) {
|
if (j%2 == 0) {
|
||||||
const int mb = j/2;
|
const int mb = j/2;
|
||||||
Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(team, mb),
|
Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(team, mb),
|
||||||
[&] (const int ma, real& sum) {
|
[&] (const int ma, real_type& sum) {
|
||||||
//for(int ma = 0; ma < mb; ma++) {
|
//for(int ma = 0; ma < mb; ma++) {
|
||||||
const int jju_index = jju+(mb-1)*(j+1)+(j+1)+ma;
|
const int jju_index = jju+(mb-1)*(j+1)+(j+1)+ma;
|
||||||
const int jjz_index = jjz+(mb-1)*(j+1)+(j+1)+ma;
|
const int jjz_index = jjz+(mb-1)*(j+1)+(j+1)+ma;
|
||||||
@ -1265,13 +1265,13 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_bi_cpu(const typename K
|
|||||||
const int ma = mb;
|
const int ma = mb;
|
||||||
const int jju_index = jju+(mb-1)*(j+1)+(j+1)+ma;
|
const int jju_index = jju+(mb-1)*(j+1)+(j+1)+ma;
|
||||||
const int jjz_index = jjz+(mb-1)*(j+1)+(j+1)+ma;
|
const int jjz_index = jjz+(mb-1)*(j+1)+(j+1)+ma;
|
||||||
sumzu += static_cast<real>(0.5)*
|
sumzu += static_cast<real_type>(0.5)*
|
||||||
(ulisttot_full(jju_index, elem3, iatom).re * zlist(jjz_index, jalloy, iatom).re +
|
(ulisttot_full(jju_index, elem3, iatom).re * zlist(jjz_index, jalloy, iatom).re +
|
||||||
ulisttot_full(jju_index, elem3, iatom).im * zlist(jjz_index, jalloy, iatom).im);
|
ulisttot_full(jju_index, elem3, iatom).im * zlist(jjz_index, jalloy, iatom).im);
|
||||||
} // end if jeven
|
} // end if jeven
|
||||||
|
|
||||||
Kokkos::single(Kokkos::PerThread(team), [&] () {
|
Kokkos::single(Kokkos::PerThread(team), [&] () {
|
||||||
sumzu *= static_cast<real>(2.0);
|
sumzu *= static_cast<real_type>(2.0);
|
||||||
|
|
||||||
// apply bzero shift
|
// apply bzero shift
|
||||||
|
|
||||||
@ -1303,12 +1303,12 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_bi_cpu(const typename K
|
|||||||
CPU version
|
CPU version
|
||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
template<class DeviceType, typename real, int vector_length>
|
template<class DeviceType, typename real_type, int vector_length>
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
void SNAKokkos<DeviceType, real, vector_length>::compute_yi_cpu(int iter,
|
void SNAKokkos<DeviceType, real_type, vector_length>::compute_yi_cpu(int iter,
|
||||||
const Kokkos::View<real**, DeviceType> &beta)
|
const Kokkos::View<real_type**, DeviceType> &beta)
|
||||||
{
|
{
|
||||||
real betaj;
|
real_type betaj;
|
||||||
const int iatom = iter / idxz_max;
|
const int iatom = iter / idxz_max;
|
||||||
const int jjz = iter % idxz_max;
|
const int jjz = iter % idxz_max;
|
||||||
|
|
||||||
@ -1323,15 +1323,15 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_yi_cpu(int iter,
|
|||||||
const int nb = idxz(jjz, 8);
|
const int nb = idxz(jjz, 8);
|
||||||
const int jju_half = idxz(jjz, 9);
|
const int jju_half = idxz(jjz, 9);
|
||||||
|
|
||||||
const real *cgblock = cglist.data() + idxcg_block(j1,j2,j);
|
const real_type *cgblock = cglist.data() + idxcg_block(j1,j2,j);
|
||||||
//int mb = (2 * (mb1min+mb2max) - j1 - j2 + j) / 2;
|
//int mb = (2 * (mb1min+mb2max) - j1 - j2 + j) / 2;
|
||||||
//int ma = (2 * (ma1min+ma2max) - j1 - j2 + j) / 2;
|
//int ma = (2 * (ma1min+ma2max) - j1 - j2 + j) / 2;
|
||||||
|
|
||||||
for (int elem1 = 0; elem1 < nelements; elem1++) {
|
for (int elem1 = 0; elem1 < nelements; elem1++) {
|
||||||
for (int elem2 = 0; elem2 < nelements; elem2++) {
|
for (int elem2 = 0; elem2 < nelements; elem2++) {
|
||||||
|
|
||||||
real ztmp_r = 0.0;
|
real_type ztmp_r = 0.0;
|
||||||
real ztmp_i = 0.0;
|
real_type ztmp_i = 0.0;
|
||||||
|
|
||||||
int jju1 = idxu_block[j1] + (j1 + 1) * mb1min;
|
int jju1 = idxu_block[j1] + (j1 + 1) * mb1min;
|
||||||
int jju2 = idxu_block[j2] + (j2 + 1) * mb2max;
|
int jju2 = idxu_block[j2] + (j2 + 1) * mb2max;
|
||||||
@ -1339,8 +1339,8 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_yi_cpu(int iter,
|
|||||||
|
|
||||||
for (int ib = 0; ib < nb; ib++) {
|
for (int ib = 0; ib < nb; ib++) {
|
||||||
|
|
||||||
real suma1_r = 0.0;
|
real_type suma1_r = 0.0;
|
||||||
real suma1_i = 0.0;
|
real_type suma1_i = 0.0;
|
||||||
|
|
||||||
int ma1 = ma1min;
|
int ma1 = ma1min;
|
||||||
int ma2 = ma2max;
|
int ma2 = ma2max;
|
||||||
@ -1411,19 +1411,19 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_yi_cpu(int iter,
|
|||||||
data layout
|
data layout
|
||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
template<class DeviceType, typename real, int vector_length>
|
template<class DeviceType, typename real_type, int vector_length>
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
void SNAKokkos<DeviceType, real, vector_length>::compute_duidrj_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int iatom, int jnbor)
|
void SNAKokkos<DeviceType, real_type, vector_length>::compute_duidrj_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int iatom, int jnbor)
|
||||||
{
|
{
|
||||||
real rsq, r, x, y, z, z0, theta0, cs, sn;
|
real_type rsq, r, x, y, z, z0, theta0, cs, sn;
|
||||||
real dz0dr;
|
real_type dz0dr;
|
||||||
|
|
||||||
x = rij(iatom,jnbor,0);
|
x = rij(iatom,jnbor,0);
|
||||||
y = rij(iatom,jnbor,1);
|
y = rij(iatom,jnbor,1);
|
||||||
z = rij(iatom,jnbor,2);
|
z = rij(iatom,jnbor,2);
|
||||||
rsq = x * x + y * y + z * z;
|
rsq = x * x + y * y + z * z;
|
||||||
r = sqrt(rsq);
|
r = sqrt(rsq);
|
||||||
auto rscale0 = rfac0 * static_cast<real>(MY_PI) / (rcutij(iatom,jnbor) - rmin0);
|
auto rscale0 = rfac0 * static_cast<real_type>(MY_PI) / (rcutij(iatom,jnbor) - rmin0);
|
||||||
theta0 = (r - rmin0) * rscale0;
|
theta0 = (r - rmin0) * rscale0;
|
||||||
sincos_wrapper(theta0, &sn, &cs);
|
sincos_wrapper(theta0, &sn, &cs);
|
||||||
z0 = r * cs / sn;
|
z0 = r * cs / sn;
|
||||||
@ -1442,16 +1442,16 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_duidrj_cpu(const typena
|
|||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
|
||||||
template<class DeviceType, typename real, int vector_length>
|
template<class DeviceType, typename real_type, int vector_length>
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
void SNAKokkos<DeviceType, real, vector_length>::compute_deidrj_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int iatom, int jnbor)
|
void SNAKokkos<DeviceType, real_type, vector_length>::compute_deidrj_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int iatom, int jnbor)
|
||||||
{
|
{
|
||||||
t_scalar3<real> final_sum;
|
t_scalar3<real_type> final_sum;
|
||||||
const int jelem = element(iatom, jnbor);
|
const int jelem = element(iatom, jnbor);
|
||||||
|
|
||||||
//for(int j = 0; j <= twojmax; j++) {
|
//for(int j = 0; j <= twojmax; j++) {
|
||||||
Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(team,twojmax+1),
|
Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(team,twojmax+1),
|
||||||
[&] (const int& j, t_scalar3<real>& sum_tmp) {
|
[&] (const int& j, t_scalar3<real_type>& sum_tmp) {
|
||||||
int jju_half = idxu_half_block[j];
|
int jju_half = idxu_half_block[j];
|
||||||
int jju_cache = idxu_cache_block[j];
|
int jju_cache = idxu_cache_block[j];
|
||||||
|
|
||||||
@ -1509,10 +1509,10 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_deidrj_cpu(const typena
|
|||||||
of the symmetry of the Wigner U matrices.
|
of the symmetry of the Wigner U matrices.
|
||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
template<class DeviceType, typename real, int vector_length>
|
template<class DeviceType, typename real_type, int vector_length>
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
void SNAKokkos<DeviceType, real, vector_length>::add_uarraytot(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int iatom, int jnbor,
|
void SNAKokkos<DeviceType, real_type, vector_length>::add_uarraytot(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int iatom, int jnbor,
|
||||||
const real& r, const real& wj, const real& rcut, int jelem)
|
const real_type& r, const real_type& wj, const real_type& rcut, int jelem)
|
||||||
{
|
{
|
||||||
const auto sfac = compute_sfac(r, rcut) * wj;
|
const auto sfac = compute_sfac(r, rcut) * wj;
|
||||||
|
|
||||||
@ -1539,18 +1539,18 @@ void SNAKokkos<DeviceType, real, vector_length>::add_uarraytot(const typename Ko
|
|||||||
information stored between layers via scratch memory on the GPU path
|
information stored between layers via scratch memory on the GPU path
|
||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
template<class DeviceType, typename real, int vector_length>
|
template<class DeviceType, typename real_type, int vector_length>
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
void SNAKokkos<DeviceType, real, vector_length>::compute_uarray_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int iatom, int jnbor,
|
void SNAKokkos<DeviceType, real_type, vector_length>::compute_uarray_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int iatom, int jnbor,
|
||||||
const real& x, const real& y, const real& z, const real& z0, const real& r)
|
const real_type& x, const real_type& y, const real_type& z, const real_type& z0, const real_type& r)
|
||||||
{
|
{
|
||||||
real r0inv;
|
real_type r0inv;
|
||||||
real a_r, b_r, a_i, b_i;
|
real_type a_r, b_r, a_i, b_i;
|
||||||
real rootpq;
|
real_type rootpq;
|
||||||
|
|
||||||
// compute Cayley-Klein parameters for unit quaternion
|
// compute Cayley-Klein parameters for unit quaternion
|
||||||
|
|
||||||
r0inv = static_cast<real>(1.0) / sqrt(r * r + z0 * z0);
|
r0inv = static_cast<real_type>(1.0) / sqrt(r * r + z0 * z0);
|
||||||
a_r = r0inv * z0;
|
a_r = r0inv * z0;
|
||||||
a_i = -r0inv * z;
|
a_i = -r0inv * z;
|
||||||
b_r = r0inv * y;
|
b_r = r0inv * y;
|
||||||
@ -1630,23 +1630,23 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_uarray_cpu(const typena
|
|||||||
Uses same cached data layout of ulist
|
Uses same cached data layout of ulist
|
||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
template<class DeviceType, typename real, int vector_length>
|
template<class DeviceType, typename real_type, int vector_length>
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
void SNAKokkos<DeviceType, real, vector_length>::compute_duarray_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int iatom, int jnbor,
|
void SNAKokkos<DeviceType, real_type, vector_length>::compute_duarray_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int iatom, int jnbor,
|
||||||
const real& x, const real& y, const real& z,
|
const real_type& x, const real_type& y, const real_type& z,
|
||||||
const real& z0, const real& r, const real& dz0dr,
|
const real_type& z0, const real_type& r, const real_type& dz0dr,
|
||||||
const real& wj, const real& rcut)
|
const real_type& wj, const real_type& rcut)
|
||||||
{
|
{
|
||||||
real r0inv;
|
real_type r0inv;
|
||||||
real a_r, a_i, b_r, b_i;
|
real_type a_r, a_i, b_r, b_i;
|
||||||
real da_r[3], da_i[3], db_r[3], db_i[3];
|
real_type da_r[3], da_i[3], db_r[3], db_i[3];
|
||||||
real dz0[3], dr0inv[3], dr0invdr;
|
real_type dz0[3], dr0inv[3], dr0invdr;
|
||||||
real rootpq;
|
real_type rootpq;
|
||||||
|
|
||||||
real rinv = 1.0 / r;
|
real_type rinv = 1.0 / r;
|
||||||
real ux = x * rinv;
|
real_type ux = x * rinv;
|
||||||
real uy = y * rinv;
|
real_type uy = y * rinv;
|
||||||
real uz = z * rinv;
|
real_type uz = z * rinv;
|
||||||
|
|
||||||
r0inv = 1.0 / sqrt(r * r + z0 * z0);
|
r0inv = 1.0 / sqrt(r * r + z0 * z0);
|
||||||
a_r = z0 * r0inv;
|
a_r = z0 * r0inv;
|
||||||
@ -1761,8 +1761,8 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_duarray_cpu(const typen
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
real sfac = compute_sfac(r, rcut);
|
real_type sfac = compute_sfac(r, rcut);
|
||||||
real dsfac = compute_dsfac(r, rcut);
|
real_type dsfac = compute_dsfac(r, rcut);
|
||||||
|
|
||||||
sfac *= wj;
|
sfac *= wj;
|
||||||
dsfac *= wj;
|
dsfac *= wj;
|
||||||
@ -1796,9 +1796,9 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_duarray_cpu(const typen
|
|||||||
factorial n, wrapper for precomputed table
|
factorial n, wrapper for precomputed table
|
||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
template<class DeviceType, typename real, int vector_length>
|
template<class DeviceType, typename real_type, int vector_length>
|
||||||
inline
|
inline
|
||||||
double SNAKokkos<DeviceType, real, vector_length>::factorial(int n)
|
double SNAKokkos<DeviceType, real_type, vector_length>::factorial(int n)
|
||||||
{
|
{
|
||||||
//if (n < 0 || n > nmaxfactorial) {
|
//if (n < 0 || n > nmaxfactorial) {
|
||||||
// char str[128];
|
// char str[128];
|
||||||
@ -1813,8 +1813,8 @@ double SNAKokkos<DeviceType, real, vector_length>::factorial(int n)
|
|||||||
factorial n table, size SNA::nmaxfactorial+1
|
factorial n table, size SNA::nmaxfactorial+1
|
||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
template<class DeviceType, typename real, int vector_length>
|
template<class DeviceType, typename real_type, int vector_length>
|
||||||
const double SNAKokkos<DeviceType, real, vector_length>::nfac_table[] = {
|
const double SNAKokkos<DeviceType, real_type, vector_length>::nfac_table[] = {
|
||||||
1,
|
1,
|
||||||
1,
|
1,
|
||||||
2,
|
2,
|
||||||
@ -1989,9 +1989,9 @@ const double SNAKokkos<DeviceType, real, vector_length>::nfac_table[] = {
|
|||||||
the function delta given by VMK Eq. 8.2(1)
|
the function delta given by VMK Eq. 8.2(1)
|
||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
template<class DeviceType, typename real, int vector_length>
|
template<class DeviceType, typename real_type, int vector_length>
|
||||||
inline
|
inline
|
||||||
double SNAKokkos<DeviceType, real, vector_length>::deltacg(int j1, int j2, int j)
|
double SNAKokkos<DeviceType, real_type, vector_length>::deltacg(int j1, int j2, int j)
|
||||||
{
|
{
|
||||||
double sfaccg = factorial((j1 + j2 + j) / 2 + 1);
|
double sfaccg = factorial((j1 + j2 + j) / 2 + 1);
|
||||||
return sqrt(factorial((j1 + j2 - j) / 2) *
|
return sqrt(factorial((j1 + j2 - j) / 2) *
|
||||||
@ -2004,9 +2004,9 @@ double SNAKokkos<DeviceType, real, vector_length>::deltacg(int j1, int j2, int j
|
|||||||
the quasi-binomial formula VMK 8.2.1(3)
|
the quasi-binomial formula VMK 8.2.1(3)
|
||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
template<class DeviceType, typename real, int vector_length>
|
template<class DeviceType, typename real_type, int vector_length>
|
||||||
inline
|
inline
|
||||||
void SNAKokkos<DeviceType, real, vector_length>::init_clebsch_gordan()
|
void SNAKokkos<DeviceType, real_type, vector_length>::init_clebsch_gordan()
|
||||||
{
|
{
|
||||||
auto h_cglist = Kokkos::create_mirror_view(cglist);
|
auto h_cglist = Kokkos::create_mirror_view(cglist);
|
||||||
|
|
||||||
@ -2074,23 +2074,23 @@ void SNAKokkos<DeviceType, real, vector_length>::init_clebsch_gordan()
|
|||||||
the p = 0, q = 0 entries are allocated and skipped for convenience.
|
the p = 0, q = 0 entries are allocated and skipped for convenience.
|
||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
template<class DeviceType, typename real, int vector_length>
|
template<class DeviceType, typename real_type, int vector_length>
|
||||||
inline
|
inline
|
||||||
void SNAKokkos<DeviceType, real, vector_length>::init_rootpqarray()
|
void SNAKokkos<DeviceType, real_type, vector_length>::init_rootpqarray()
|
||||||
{
|
{
|
||||||
auto h_rootpqarray = Kokkos::create_mirror_view(rootpqarray);
|
auto h_rootpqarray = Kokkos::create_mirror_view(rootpqarray);
|
||||||
for (int p = 1; p <= twojmax; p++)
|
for (int p = 1; p <= twojmax; p++)
|
||||||
for (int q = 1; q <= twojmax; q++)
|
for (int q = 1; q <= twojmax; q++)
|
||||||
h_rootpqarray(p,q) = static_cast<real>(sqrt(static_cast<double>(p)/q));
|
h_rootpqarray(p,q) = static_cast<real_type>(sqrt(static_cast<double>(p)/q));
|
||||||
Kokkos::deep_copy(rootpqarray,h_rootpqarray);
|
Kokkos::deep_copy(rootpqarray,h_rootpqarray);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
template<class DeviceType, typename real, int vector_length>
|
template<class DeviceType, typename real_type, int vector_length>
|
||||||
inline
|
inline
|
||||||
int SNAKokkos<DeviceType, real, vector_length>::compute_ncoeff()
|
int SNAKokkos<DeviceType, real_type, vector_length>::compute_ncoeff()
|
||||||
{
|
{
|
||||||
int ncount;
|
int ncount;
|
||||||
|
|
||||||
@ -2111,19 +2111,19 @@ int SNAKokkos<DeviceType, real, vector_length>::compute_ncoeff()
|
|||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
template<class DeviceType, typename real, int vector_length>
|
template<class DeviceType, typename real_type, int vector_length>
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
real SNAKokkos<DeviceType, real, vector_length>::compute_sfac(real r, real rcut)
|
real_type SNAKokkos<DeviceType, real_type, vector_length>::compute_sfac(real_type r, real_type rcut)
|
||||||
{
|
{
|
||||||
constexpr real one = static_cast<real>(1.0);
|
constexpr real_type one = static_cast<real_type>(1.0);
|
||||||
constexpr real zero = static_cast<real>(0.0);
|
constexpr real_type zero = static_cast<real_type>(0.0);
|
||||||
constexpr real onehalf = static_cast<real>(0.5);
|
constexpr real_type onehalf = static_cast<real_type>(0.5);
|
||||||
if (switch_flag == 0) return one;
|
if (switch_flag == 0) return one;
|
||||||
if (switch_flag == 1) {
|
if (switch_flag == 1) {
|
||||||
if(r <= rmin0) return one;
|
if(r <= rmin0) return one;
|
||||||
else if(r > rcut) return zero;
|
else if(r > rcut) return zero;
|
||||||
else {
|
else {
|
||||||
auto rcutfac = static_cast<real>(MY_PI) / (rcut - rmin0);
|
auto rcutfac = static_cast<real_type>(MY_PI) / (rcut - rmin0);
|
||||||
return onehalf * (cos((r - rmin0) * rcutfac) + one);
|
return onehalf * (cos((r - rmin0) * rcutfac) + one);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -2132,37 +2132,37 @@ real SNAKokkos<DeviceType, real, vector_length>::compute_sfac(real r, real rcut)
|
|||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
template<class DeviceType, typename real, int vector_length>
|
template<class DeviceType, typename real_type, int vector_length>
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
real SNAKokkos<DeviceType, real, vector_length>::compute_dsfac(real r, real rcut)
|
real_type SNAKokkos<DeviceType, real_type, vector_length>::compute_dsfac(real_type r, real_type rcut)
|
||||||
{
|
{
|
||||||
constexpr real zero = static_cast<real>(0.0);
|
constexpr real_type zero = static_cast<real_type>(0.0);
|
||||||
constexpr real onehalf = static_cast<real>(0.5);
|
constexpr real_type onehalf = static_cast<real_type>(0.5);
|
||||||
if (switch_flag == 0) return zero;
|
if (switch_flag == 0) return zero;
|
||||||
if (switch_flag == 1) {
|
if (switch_flag == 1) {
|
||||||
if(r <= rmin0) return zero;
|
if(r <= rmin0) return zero;
|
||||||
else if(r > rcut) return zero;
|
else if(r > rcut) return zero;
|
||||||
else {
|
else {
|
||||||
auto rcutfac = static_cast<real>(MY_PI) / (rcut - rmin0);
|
auto rcutfac = static_cast<real_type>(MY_PI) / (rcut - rmin0);
|
||||||
return -onehalf * sin((r - rmin0) * rcutfac) * rcutfac;
|
return -onehalf * sin((r - rmin0) * rcutfac) * rcutfac;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return zero;
|
return zero;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class DeviceType, typename real, int vector_length>
|
template<class DeviceType, typename real_type, int vector_length>
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
void SNAKokkos<DeviceType, real, vector_length>::compute_s_dsfac(const real r, const real rcut, real& sfac, real& dsfac) {
|
void SNAKokkos<DeviceType, real_type, vector_length>::compute_s_dsfac(const real_type r, const real_type rcut, real_type& sfac, real_type& dsfac) {
|
||||||
constexpr real one = static_cast<real>(1.0);
|
constexpr real_type one = static_cast<real_type>(1.0);
|
||||||
constexpr real zero = static_cast<real>(0.0);
|
constexpr real_type zero = static_cast<real_type>(0.0);
|
||||||
constexpr real onehalf = static_cast<real>(0.5);
|
constexpr real_type onehalf = static_cast<real_type>(0.5);
|
||||||
if (switch_flag == 0) { sfac = zero; dsfac = zero; }
|
if (switch_flag == 0) { sfac = zero; dsfac = zero; }
|
||||||
else if (switch_flag == 1) {
|
else if (switch_flag == 1) {
|
||||||
if (r <= rmin0) { sfac = one; dsfac = zero; }
|
if (r <= rmin0) { sfac = one; dsfac = zero; }
|
||||||
else if (r > rcut) { sfac = zero; dsfac = zero; }
|
else if (r > rcut) { sfac = zero; dsfac = zero; }
|
||||||
else {
|
else {
|
||||||
const auto rcutfac = static_cast<real>(MY_PI) / (rcut - rmin0);
|
const auto rcutfac = static_cast<real_type>(MY_PI) / (rcut - rmin0);
|
||||||
real sn, cs;
|
real_type sn, cs;
|
||||||
sincos_wrapper((r - rmin0) * rcutfac, &sn, &cs); // need to create a wrapper
|
sincos_wrapper((r - rmin0) * rcutfac, &sn, &cs); // need to create a wrapper
|
||||||
sfac = onehalf * (cs + one);
|
sfac = onehalf * (cs + one);
|
||||||
dsfac = -onehalf * sn * rcutfac;
|
dsfac = -onehalf * sn * rcutfac;
|
||||||
@ -2174,9 +2174,9 @@ void SNAKokkos<DeviceType, real, vector_length>::compute_s_dsfac(const real r, c
|
|||||||
/* ---------------------------------------------------------------------- */
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
// set direction of batched Duidrj
|
// set direction of batched Duidrj
|
||||||
template<class DeviceType, typename real, int vector_length>
|
template<class DeviceType, typename real_type, int vector_length>
|
||||||
KOKKOS_FORCEINLINE_FUNCTION
|
KOKKOS_FORCEINLINE_FUNCTION
|
||||||
void SNAKokkos<DeviceType, real, vector_length>::set_dir(int dir_) {
|
void SNAKokkos<DeviceType, real_type, vector_length>::set_dir(int dir_) {
|
||||||
dir = dir_;
|
dir = dir_;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2184,8 +2184,8 @@ void SNAKokkos<DeviceType, real, vector_length>::set_dir(int dir_) {
|
|||||||
memory usage of arrays
|
memory usage of arrays
|
||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
template<class DeviceType, typename real, int vector_length>
|
template<class DeviceType, typename real_type, int vector_length>
|
||||||
double SNAKokkos<DeviceType, real, vector_length>::memory_usage()
|
double SNAKokkos<DeviceType, real_type, vector_length>::memory_usage()
|
||||||
{
|
{
|
||||||
int jdimpq = twojmax + 2;
|
int jdimpq = twojmax + 2;
|
||||||
int jdim = twojmax + 1;
|
int jdim = twojmax + 1;
|
||||||
@ -2193,48 +2193,48 @@ double SNAKokkos<DeviceType, real, vector_length>::memory_usage()
|
|||||||
|
|
||||||
bytes = 0;
|
bytes = 0;
|
||||||
|
|
||||||
bytes += jdimpq*jdimpq * sizeof(real); // pqarray
|
bytes += jdimpq*jdimpq * sizeof(real_type); // pqarray
|
||||||
bytes += idxcg_max * sizeof(real); // cglist
|
bytes += idxcg_max * sizeof(real_type); // cglist
|
||||||
|
|
||||||
#ifdef LMP_KOKKOS_GPU
|
#ifdef LMP_KOKKOS_GPU
|
||||||
if (!host_flag) {
|
if (!host_flag) {
|
||||||
|
|
||||||
auto natom_pad = (natom+vector_length-1)/vector_length;
|
auto natom_pad = (natom+vector_length-1)/vector_length;
|
||||||
|
|
||||||
bytes += natom_pad * nmax * sizeof(real) * 2; // a_pack
|
bytes += natom_pad * nmax * sizeof(real_type) * 2; // a_pack
|
||||||
bytes += natom_pad * nmax * sizeof(real) * 2; // b_pack
|
bytes += natom_pad * nmax * sizeof(real_type) * 2; // b_pack
|
||||||
bytes += natom_pad * nmax * 3 * sizeof(real) * 2; // da_pack
|
bytes += natom_pad * nmax * 3 * sizeof(real_type) * 2; // da_pack
|
||||||
bytes += natom_pad * nmax * 3 * sizeof(real) * 2; // db_pack
|
bytes += natom_pad * nmax * 3 * sizeof(real_type) * 2; // db_pack
|
||||||
bytes += natom_pad * nmax * 4 * sizeof(real); // sfac_pack
|
bytes += natom_pad * nmax * 4 * sizeof(real_type); // sfac_pack
|
||||||
|
|
||||||
|
|
||||||
bytes += natom_pad * idxu_half_max * nelements * sizeof(real); // ulisttot_re_pack
|
bytes += natom_pad * idxu_half_max * nelements * sizeof(real_type); // ulisttot_re_pack
|
||||||
bytes += natom_pad * idxu_half_max * nelements * sizeof(real); // ulisttot_im_pack
|
bytes += natom_pad * idxu_half_max * nelements * sizeof(real_type); // ulisttot_im_pack
|
||||||
bytes += natom_pad * idxu_max * nelements * sizeof(real) * 2; // ulisttot_pack
|
bytes += natom_pad * idxu_max * nelements * sizeof(real_type) * 2; // ulisttot_pack
|
||||||
|
|
||||||
bytes += natom_pad * idxz_max * ndoubles * sizeof(real) * 2; // zlist_pack
|
bytes += natom_pad * idxz_max * ndoubles * sizeof(real_type) * 2; // zlist_pack
|
||||||
bytes += natom_pad * idxb_max * ntriples * sizeof(real); // blist_pack
|
bytes += natom_pad * idxb_max * ntriples * sizeof(real_type); // blist_pack
|
||||||
|
|
||||||
bytes += natom_pad * idxu_half_max * nelements * sizeof(real); // ylist_pack_re
|
bytes += natom_pad * idxu_half_max * nelements * sizeof(real_type); // ylist_pack_re
|
||||||
bytes += natom_pad * idxu_half_max * nelements * sizeof(real); // ylist_pack_im
|
bytes += natom_pad * idxu_half_max * nelements * sizeof(real_type); // ylist_pack_im
|
||||||
} else {
|
} else {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
bytes += natom * nmax * idxu_cache_max * sizeof(real) * 2; // ulist
|
bytes += natom * nmax * idxu_cache_max * sizeof(real_type) * 2; // ulist
|
||||||
bytes += natom * idxu_half_max * nelements * sizeof(real) * 2; // ulisttot
|
bytes += natom * idxu_half_max * nelements * sizeof(real_type) * 2; // ulisttot
|
||||||
bytes += natom * idxu_max * nelements * sizeof(real) * 2; // ulisttot_full
|
bytes += natom * idxu_max * nelements * sizeof(real_type) * 2; // ulisttot_full
|
||||||
|
|
||||||
bytes += natom * idxz_max * ndoubles * sizeof(real) * 2; // zlist
|
bytes += natom * idxz_max * ndoubles * sizeof(real_type) * 2; // zlist
|
||||||
bytes += natom * idxb_max * ntriples * sizeof(real); // blist
|
bytes += natom * idxb_max * ntriples * sizeof(real_type); // blist
|
||||||
|
|
||||||
bytes += natom * idxu_half_max * nelements * sizeof(real) * 2; // ylist
|
bytes += natom * idxu_half_max * nelements * sizeof(real_type) * 2; // ylist
|
||||||
|
|
||||||
bytes += natom * nmax * idxu_cache_max * 3 * sizeof(real) * 2; // dulist
|
bytes += natom * nmax * idxu_cache_max * 3 * sizeof(real_type) * 2; // dulist
|
||||||
#ifdef LMP_KOKKOS_GPU
|
#ifdef LMP_KOKKOS_GPU
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
bytes += natom * nmax * 3 * sizeof(real); // dedr
|
bytes += natom * nmax * 3 * sizeof(real_type); // dedr
|
||||||
|
|
||||||
bytes += jdim * jdim * jdim * sizeof(int); // idxcg_block
|
bytes += jdim * jdim * jdim * sizeof(int); // idxcg_block
|
||||||
bytes += jdim * sizeof(int); // idxu_block
|
bytes += jdim * sizeof(int); // idxu_block
|
||||||
@ -2247,12 +2247,12 @@ double SNAKokkos<DeviceType, real, vector_length>::memory_usage()
|
|||||||
bytes += idxz_max * 10 * sizeof(int); // idxz
|
bytes += idxz_max * 10 * sizeof(int); // idxz
|
||||||
bytes += idxb_max * 3 * sizeof(int); // idxb
|
bytes += idxb_max * 3 * sizeof(int); // idxb
|
||||||
|
|
||||||
bytes += jdim * sizeof(real); // bzero
|
bytes += jdim * sizeof(real_type); // bzero
|
||||||
|
|
||||||
bytes += natom * nmax * 3 * sizeof(real); // rij
|
bytes += natom * nmax * 3 * sizeof(real_type); // rij
|
||||||
bytes += natom * nmax * sizeof(real); // inside
|
bytes += natom * nmax * sizeof(real_type); // inside
|
||||||
bytes += natom * nmax * sizeof(real); // wj
|
bytes += natom * nmax * sizeof(real_type); // wj
|
||||||
bytes += natom * nmax * sizeof(real); // rcutij
|
bytes += natom * nmax * sizeof(real_type); // rcutij
|
||||||
|
|
||||||
return bytes;
|
return bytes;
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user