Merge branch 'master' into lammps_gjf
This commit is contained in:
@ -39,6 +39,7 @@ struct TagPairSNAPPreUi{};
|
||||
struct TagPairSNAPComputeUi{};
|
||||
struct TagPairSNAPComputeZi{};
|
||||
struct TagPairSNAPComputeBi{};
|
||||
struct TagPairSNAPZeroYi{};
|
||||
struct TagPairSNAPComputeYi{};
|
||||
struct TagPairSNAPComputeDuidrj{};
|
||||
struct TagPairSNAPComputeDeidrj{};
|
||||
@ -73,19 +74,22 @@ public:
|
||||
void operator() (TagPairSNAPComputeNeigh,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeNeigh>::member_type& team) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (TagPairSNAPPreUi,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPPreUi>::member_type& team) const;
|
||||
void operator() (TagPairSNAPPreUi,const int& ii) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (TagPairSNAPComputeUi,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeUi>::member_type& team) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (TagPairSNAPComputeZi,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeZi>::member_type& team) const;
|
||||
void operator() (TagPairSNAPComputeZi,const int& ii) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (TagPairSNAPComputeBi,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeBi>::member_type& team) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (TagPairSNAPComputeYi,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeYi>::member_type& team) const;
|
||||
void operator() (TagPairSNAPZeroYi,const int& ii) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (TagPairSNAPComputeYi,const int& ii) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (TagPairSNAPComputeDuidrj,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeDuidrj>::member_type& team) const;
|
||||
|
||||
@ -184,22 +184,12 @@ void PairSNAPKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
||||
Kokkos::parallel_reduce("PairSNAPKokkos::find_max_neighs",inum, FindMaxNumNeighs<DeviceType>(k_list), Kokkos::Experimental::Max<int>(max_neighs));
|
||||
|
||||
int vector_length = 1;
|
||||
int ui_vector_length = 1;
|
||||
int team_size = 1;
|
||||
int yi_team_size = 1;
|
||||
int team_size_max = Kokkos::TeamPolicy<DeviceType>::team_size_max(*this);
|
||||
#ifdef KOKKOS_ENABLE_CUDA
|
||||
team_size = 32;//max_neighs;
|
||||
if (team_size*vector_length > team_size_max)
|
||||
team_size = team_size_max/vector_length;
|
||||
|
||||
yi_team_size = 256;
|
||||
if (yi_team_size*vector_length > team_size_max)
|
||||
yi_team_size = team_size_max/vector_length;
|
||||
|
||||
ui_vector_length = 8;
|
||||
if (team_size*ui_vector_length > team_size_max)
|
||||
team_size = team_size_max/ui_vector_length;
|
||||
#endif
|
||||
|
||||
if (beta_max < inum) {
|
||||
@ -227,17 +217,21 @@ void PairSNAPKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
||||
Kokkos::parallel_for("ComputeNeigh",policy_neigh,*this);
|
||||
|
||||
//PreUi
|
||||
typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPPreUi> policy_preui(chunk_size,team_size,vector_length);
|
||||
typename Kokkos::RangePolicy<DeviceType, TagPairSNAPPreUi> policy_preui(0,chunk_size);
|
||||
Kokkos::parallel_for("PreUi",policy_preui,*this);
|
||||
|
||||
//ComputeUi
|
||||
typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeUi> policy_ui(((inum+team_size-1)/team_size)*max_neighs,team_size,ui_vector_length);
|
||||
typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeUi> policy_ui(((inum+team_size-1)/team_size)*max_neighs,team_size,vector_length);
|
||||
Kokkos::parallel_for("ComputeUi",policy_ui,*this);
|
||||
|
||||
//Ulisttot transpose
|
||||
snaKK.transpose_ulisttot();
|
||||
|
||||
//Compute bispectrum
|
||||
if (quadraticflag || eflag) {
|
||||
//ComputeZi
|
||||
typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeZi> policy_zi(chunk_size,team_size,vector_length);
|
||||
int idxz_max = snaKK.idxz_max;
|
||||
typename Kokkos::RangePolicy<DeviceType, TagPairSNAPComputeZi> policy_zi(0,chunk_size*idxz_max);
|
||||
Kokkos::parallel_for("ComputeZi",policy_zi,*this);
|
||||
|
||||
//ComputeBi
|
||||
@ -250,7 +244,12 @@ void PairSNAPKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
||||
Kokkos::parallel_for("ComputeBeta",policy_beta,*this);
|
||||
|
||||
//ComputeYi
|
||||
typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeYi> policy_yi(chunk_size,yi_team_size,vector_length);
|
||||
typename Kokkos::RangePolicy<DeviceType, TagPairSNAPZeroYi> policy_zero_yi(0,chunk_size);
|
||||
Kokkos::parallel_for("ZeroYi",policy_zero_yi,*this);
|
||||
|
||||
//ComputeYi
|
||||
int idxz_max = snaKK.idxz_max;
|
||||
typename Kokkos::RangePolicy<DeviceType, TagPairSNAPComputeYi> policy_yi(0,chunk_size*idxz_max);
|
||||
Kokkos::parallel_for("ComputeYi",policy_yi,*this);
|
||||
|
||||
//ComputeDuidrj
|
||||
@ -504,10 +503,9 @@ void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPComputeNeigh,const typen
|
||||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPPreUi,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPPreUi>::member_type& team) const {
|
||||
int ii = team.league_rank();
|
||||
void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPPreUi,const int& ii) const {
|
||||
SNAKokkos<DeviceType> my_sna = snaKK;
|
||||
my_sna.pre_ui(team,ii);
|
||||
my_sna.pre_ui(ii);
|
||||
}
|
||||
|
||||
template<class DeviceType>
|
||||
@ -529,18 +527,23 @@ void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPComputeUi,const typename
|
||||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPComputeYi,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeYi>::member_type& team) const {
|
||||
int ii = team.league_rank();
|
||||
void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPZeroYi,const int& ii) const {
|
||||
SNAKokkos<DeviceType> my_sna = snaKK;
|
||||
my_sna.compute_yi(team,ii,d_beta);
|
||||
my_sna.zero_yi(ii);
|
||||
}
|
||||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPComputeZi,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeZi>::member_type& team) const {
|
||||
int ii = team.league_rank();
|
||||
void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPComputeYi,const int& ii) const {
|
||||
SNAKokkos<DeviceType> my_sna = snaKK;
|
||||
my_sna.compute_zi(team,ii);
|
||||
my_sna.compute_yi(ii,d_beta);
|
||||
}
|
||||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPComputeZi,const int& ii) const {
|
||||
SNAKokkos<DeviceType> my_sna = snaKK;
|
||||
my_sna.compute_zi(ii);
|
||||
}
|
||||
|
||||
template<class DeviceType>
|
||||
|
||||
@ -26,15 +26,28 @@
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
typedef double SNAreal;
|
||||
typedef struct { SNAreal re, im; } SNAcomplex;
|
||||
|
||||
struct SNAKK_ZINDICES {
|
||||
int j1, j2, j, ma1min, ma2max, mb1min, mb2max, na, nb, jju;
|
||||
//typedef struct { SNAreal re, im; } SNAcomplex;
|
||||
struct alignas(2*sizeof(SNAreal)) SNAcomplex{
|
||||
SNAreal re, im;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
SNAcomplex() : re(0),im(0)
|
||||
{}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
SNAcomplex(SNAreal real_in, SNAreal imag_in)
|
||||
:re(real_in),im(imag_in)
|
||||
{}
|
||||
};
|
||||
|
||||
struct SNAKK_BINDICES {
|
||||
int j1, j2, j;
|
||||
};
|
||||
//struct SNAKK_ZINDICES {
|
||||
// int j1, j2, j, ma1min, ma2max, mb1min, mb2max, na, nb, jju;
|
||||
//};
|
||||
//
|
||||
//struct SNAKK_BINDICES {
|
||||
// int j1, j2, j;
|
||||
//};
|
||||
|
||||
template<class DeviceType>
|
||||
class SNAKokkos {
|
||||
@ -53,12 +66,32 @@ public:
|
||||
typedef Kokkos::View<SNAcomplex*, DeviceType> t_sna_1c;
|
||||
typedef Kokkos::View<SNAcomplex*, DeviceType, Kokkos::MemoryTraits<Kokkos::Atomic> > t_sna_1c_atomic;
|
||||
typedef Kokkos::View<SNAcomplex**, DeviceType> t_sna_2c;
|
||||
typedef Kokkos::View<SNAcomplex**, Kokkos::LayoutRight, DeviceType> t_sna_2c_cpu;
|
||||
typedef Kokkos::View<SNAcomplex**, Kokkos::LayoutRight, DeviceType> t_sna_2c_lr;
|
||||
typedef Kokkos::View<SNAcomplex***, DeviceType> t_sna_3c;
|
||||
typedef Kokkos::View<SNAcomplex***[3], DeviceType> t_sna_4c;
|
||||
typedef Kokkos::View<SNAcomplex**[3], DeviceType> t_sna_3c3;
|
||||
typedef Kokkos::View<SNAcomplex*****, DeviceType> t_sna_5c;
|
||||
|
||||
// Helper class to get ulisttot_r
|
||||
|
||||
template<typename DeviceLayout, typename T1, typename T2>
|
||||
class UlisttotHelper {
|
||||
public:
|
||||
inline
|
||||
static void transpose(T1 &ulisttot_lr, const T2 &ulisttot) {
|
||||
Kokkos::deep_copy(ulisttot_lr,ulisttot);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T1, typename T2>
|
||||
class UlisttotHelper<Kokkos::LayoutRight,T1,T2> {
|
||||
public:
|
||||
inline
|
||||
static void transpose(T1 &ulisttot_lr, const T2 &ulisttot) {
|
||||
ulisttot_lr = ulisttot;
|
||||
}
|
||||
};
|
||||
|
||||
inline
|
||||
SNAKokkos() {};
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
@ -80,17 +113,22 @@ inline
|
||||
|
||||
int ncoeff;
|
||||
|
||||
inline
|
||||
void transpose_ulisttot();
|
||||
|
||||
// functions for bispectrum coefficients
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void pre_ui(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int); // ForceSNAP
|
||||
void pre_ui(const int&); // ForceSNAP
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void compute_ui(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int, int); // ForceSNAP
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void compute_ui_orig(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int, int); // ForceSNAP
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void compute_zi(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int); // ForceSNAP
|
||||
void compute_zi(const int&); // ForceSNAP
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void compute_yi(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int,
|
||||
void zero_yi(const int&);
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void compute_yi(int,
|
||||
const Kokkos::View<F_FLOAT**, DeviceType> &beta); // ForceSNAP
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void compute_bi(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int); // ForceSNAP
|
||||
@ -129,23 +167,25 @@ inline
|
||||
int twojmax, diagonalstyle;
|
||||
|
||||
t_sna_2d blist;
|
||||
t_sna_2c_cpu ulisttot;
|
||||
t_sna_2c ulisttot;
|
||||
t_sna_2c_lr ulisttot_lr;
|
||||
t_sna_2c zlist;
|
||||
|
||||
t_sna_3c ulist;
|
||||
t_sna_2c ylist;
|
||||
t_sna_2c_lr ylist;
|
||||
|
||||
// derivatives of data
|
||||
t_sna_4c dulist;
|
||||
|
||||
int idxcg_max, idxu_max, idxz_max, idxb_max;
|
||||
|
||||
private:
|
||||
double rmin0, rfac0;
|
||||
|
||||
//use indexlist instead of loops, constructor generates these
|
||||
// Same across all SNAKokkos
|
||||
Kokkos::View<SNAKK_ZINDICES*, DeviceType> idxz;
|
||||
Kokkos::View<SNAKK_BINDICES*, DeviceType> idxb;
|
||||
int idxcg_max, idxu_max, idxz_max, idxb_max;
|
||||
Kokkos::View<int*[10], DeviceType> idxz;
|
||||
Kokkos::View<int*[3], DeviceType> idxb;
|
||||
Kokkos::View<int***, DeviceType> idxcg_block;
|
||||
Kokkos::View<int*, DeviceType> idxu_block;
|
||||
Kokkos::View<int***, DeviceType> idxz_block;
|
||||
@ -173,9 +213,9 @@ inline
|
||||
inline
|
||||
void init_rootpqarray(); // init()
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void zero_uarraytot(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int); // compute_ui
|
||||
void zero_uarraytot(const int&); // compute_ui
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void addself_uarraytot(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int, double); // compute_ui
|
||||
void addself_uarraytot(const int&, const double&); // compute_ui
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void add_uarraytot(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int, int, double, double, double); // compute_ui
|
||||
|
||||
|
||||
@ -117,7 +117,7 @@ void SNAKokkos<DeviceType>::build_indexlist()
|
||||
if (j >= j1) idxb_count++;
|
||||
|
||||
idxb_max = idxb_count;
|
||||
idxb = Kokkos::View<SNAKK_BINDICES*, DeviceType>("SNAKokkos::idxb",idxb_max);
|
||||
idxb = Kokkos::View<int*[3], DeviceType>("SNAKokkos::idxb",idxb_max);
|
||||
auto h_idxb = Kokkos::create_mirror_view(idxb);
|
||||
|
||||
idxb_count = 0;
|
||||
@ -125,9 +125,9 @@ void SNAKokkos<DeviceType>::build_indexlist()
|
||||
for(int j2 = 0; j2 <= j1; j2++)
|
||||
for(int j = j1 - j2; j <= MIN(twojmax, j1 + j2); j += 2)
|
||||
if (j >= j1) {
|
||||
h_idxb[idxb_count].j1 = j1;
|
||||
h_idxb[idxb_count].j2 = j2;
|
||||
h_idxb[idxb_count].j = j;
|
||||
h_idxb(idxb_count,0) = j1;
|
||||
h_idxb(idxb_count,1) = j2;
|
||||
h_idxb(idxb_count,2) = j;
|
||||
idxb_count++;
|
||||
}
|
||||
Kokkos::deep_copy(idxb,h_idxb);
|
||||
@ -160,7 +160,7 @@ void SNAKokkos<DeviceType>::build_indexlist()
|
||||
idxz_count++;
|
||||
|
||||
idxz_max = idxz_count;
|
||||
idxz = Kokkos::View<SNAKK_ZINDICES*, DeviceType>("SNAKokkos::idxz",idxz_max);
|
||||
idxz = Kokkos::View<int*[10], DeviceType>("SNAKokkos::idxz",idxz_max);
|
||||
auto h_idxz = Kokkos::create_mirror_view(idxz);
|
||||
|
||||
idxz_block = Kokkos::View<int***, DeviceType>("SNAKokkos::idxz_block", jdim,jdim,jdim);
|
||||
@ -178,20 +178,20 @@ void SNAKokkos<DeviceType>::build_indexlist()
|
||||
|
||||
for (int mb = 0; 2*mb <= j; mb++)
|
||||
for (int ma = 0; ma <= j; ma++) {
|
||||
h_idxz[idxz_count].j1 = j1;
|
||||
h_idxz[idxz_count].j2 = j2;
|
||||
h_idxz[idxz_count].j = j;
|
||||
h_idxz[idxz_count].ma1min = MAX(0, (2 * ma - j - j2 + j1) / 2);
|
||||
h_idxz[idxz_count].ma2max = (2 * ma - j - (2 * h_idxz[idxz_count].ma1min - j1) + j2) / 2;
|
||||
h_idxz[idxz_count].na = MIN(j1, (2 * ma - j + j2 + j1) / 2) - h_idxz[idxz_count].ma1min + 1;
|
||||
h_idxz[idxz_count].mb1min = MAX(0, (2 * mb - j - j2 + j1) / 2);
|
||||
h_idxz[idxz_count].mb2max = (2 * mb - j - (2 * h_idxz[idxz_count].mb1min - j1) + j2) / 2;
|
||||
h_idxz[idxz_count].nb = MIN(j1, (2 * mb - j + j2 + j1) / 2) - h_idxz[idxz_count].mb1min + 1;
|
||||
h_idxz(idxz_count,0) = j1;
|
||||
h_idxz(idxz_count,1) = j2;
|
||||
h_idxz(idxz_count,2) = j;
|
||||
h_idxz(idxz_count,3) = MAX(0, (2 * ma - j - j2 + j1) / 2);
|
||||
h_idxz(idxz_count,4) = (2 * ma - j - (2 * h_idxz(idxz_count,3) - j1) + j2) / 2;
|
||||
h_idxz(idxz_count,5) = MAX(0, (2 * mb - j - j2 + j1) / 2);
|
||||
h_idxz(idxz_count,6) = (2 * mb - j - (2 * h_idxz(idxz_count,5) - j1) + j2) / 2;
|
||||
h_idxz(idxz_count,7) = MIN(j1, (2 * ma - j + j2 + j1) / 2) - h_idxz(idxz_count,3) + 1;
|
||||
h_idxz(idxz_count,8) = MIN(j1, (2 * mb - j + j2 + j1) / 2) - h_idxz(idxz_count,5) + 1;
|
||||
|
||||
// apply to z(j1,j2,j,ma,mb) to unique element of y(j)
|
||||
|
||||
const int jju = h_idxu_block[j] + (j+1)*mb + ma;
|
||||
h_idxz[idxz_count].jju = jju;
|
||||
h_idxz(idxz_count,9) = jju;
|
||||
|
||||
idxz_count++;
|
||||
}
|
||||
@ -225,11 +225,13 @@ void SNAKokkos<DeviceType>::grow_rij(int newnatom, int newnmax)
|
||||
dedr = t_sna_3d("sna:dedr",natom,nmax,3);
|
||||
|
||||
blist = t_sna_2d("sna:blist",natom,idxb_max);
|
||||
ulisttot = t_sna_2c_cpu("sna:ulisttot",natom,idxu_max);
|
||||
ulisttot = t_sna_2c("sna:ulisttot",natom,idxu_max);
|
||||
if (!Kokkos::Impl::is_same<typename DeviceType::array_layout,Kokkos::LayoutRight>::value)
|
||||
ulisttot_lr = t_sna_2c_lr("sna:ulisttot_lr",natom,idxu_max);
|
||||
zlist = t_sna_2c("sna:zlist",natom,idxz_max);
|
||||
|
||||
ulist = t_sna_3c("sna:ulist",natom,nmax,idxu_max);
|
||||
ylist = t_sna_2c("sna:ylist",natom,idxu_max);
|
||||
ylist = t_sna_2c_lr("sna:ylist",natom,idxu_max);
|
||||
|
||||
dulist = t_sna_4c("sna:dulist",natom,nmax,idxu_max);
|
||||
}
|
||||
@ -240,15 +242,15 @@ void SNAKokkos<DeviceType>::grow_rij(int newnatom, int newnmax)
|
||||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void SNAKokkos<DeviceType>::pre_ui(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int iatom)
|
||||
void SNAKokkos<DeviceType>::pre_ui(const int& iatom)
|
||||
{
|
||||
if(team.team_rank() == 0) {
|
||||
zero_uarraytot(team,iatom);
|
||||
//if(team.team_rank() == 0) {
|
||||
zero_uarraytot(iatom);
|
||||
//Kokkos::single(Kokkos::PerThread(team), [&] (){
|
||||
addself_uarraytot(team,iatom,wself);
|
||||
addself_uarraytot(iatom,wself);
|
||||
//});
|
||||
}
|
||||
team.team_barrier();
|
||||
//}
|
||||
//team.team_barrier();
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
@ -278,50 +280,7 @@ void SNAKokkos<DeviceType>::compute_ui(const typename Kokkos::TeamPolicy<DeviceT
|
||||
z0 = r / tan(theta0);
|
||||
|
||||
compute_uarray(team, iatom, jnbor, x, y, z, z0, r);
|
||||
//Kokkos::single(Kokkos::PerThread(team), [&] (){
|
||||
add_uarraytot(team, iatom, jnbor, r, wj(iatom,jnbor), rcutij(iatom,jnbor));
|
||||
//});
|
||||
}
|
||||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void SNAKokkos<DeviceType>::compute_ui_orig(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int iatom, int jnum)
|
||||
{
|
||||
double rsq, r, x, y, z, z0, theta0;
|
||||
|
||||
// utot(j,ma,mb) = 0 for all j,ma,ma
|
||||
// utot(j,ma,ma) = 1 for all j,ma
|
||||
// for j in neighbors of i:
|
||||
// compute r0 = (x,y,z,z0)
|
||||
// utot(j,ma,mb) += u(r0;j,ma,mb) for all j,ma,mb
|
||||
|
||||
if(team.team_rank() == 0) {
|
||||
zero_uarraytot(team,iatom);
|
||||
//Kokkos::single(Kokkos::PerThread(team), [&] (){
|
||||
addself_uarraytot(team,iatom,wself);
|
||||
//});
|
||||
}
|
||||
team.team_barrier();
|
||||
|
||||
Kokkos::parallel_for(Kokkos::TeamThreadRange(team,jnum),
|
||||
[&] (const int& j) {
|
||||
//for(int j = 0; j < jnum; j++) {
|
||||
x = rij(iatom,j,0);
|
||||
y = rij(iatom,j,1);
|
||||
z = rij(iatom,j,2);
|
||||
rsq = x * x + y * y + z * z;
|
||||
r = sqrt(rsq);
|
||||
|
||||
theta0 = (r - rmin0) * rfac0 * MY_PI / (rcutij(iatom,j) - rmin0);
|
||||
// theta0 = (r - rmin0) * rscale0;
|
||||
z0 = r / tan(theta0);
|
||||
|
||||
compute_uarray(team, iatom, j, x, y, z, z0, r);
|
||||
//Kokkos::single(Kokkos::PerThread(team), [&] (){
|
||||
add_uarraytot(team, iatom, j, r, wj(iatom,j), rcutij(iatom,j));
|
||||
//});
|
||||
});
|
||||
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
@ -330,54 +289,52 @@ void SNAKokkos<DeviceType>::compute_ui_orig(const typename Kokkos::TeamPolicy<De
|
||||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void SNAKokkos<DeviceType>::compute_zi(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int iatom)
|
||||
void SNAKokkos<DeviceType>::compute_zi(const int& iter)
|
||||
{
|
||||
Kokkos::parallel_for(Kokkos::TeamThreadRange(team,idxz_max),
|
||||
[&] (const int& jjz) {
|
||||
//for(int jjz = 0; jjz < idxz_max; jjz++) {
|
||||
const int j1 = idxz[jjz].j1;
|
||||
const int j2 = idxz[jjz].j2;
|
||||
const int j = idxz[jjz].j;
|
||||
const int ma1min = idxz[jjz].ma1min;
|
||||
const int ma2max = idxz[jjz].ma2max;
|
||||
const int na = idxz[jjz].na;
|
||||
const int mb1min = idxz[jjz].mb1min;
|
||||
const int mb2max = idxz[jjz].mb2max;
|
||||
const int nb = idxz[jjz].nb;
|
||||
const int iatom = iter / idxz_max;
|
||||
const int jjz = iter % idxz_max;
|
||||
|
||||
const double* cgblock = cglist.data() + idxcg_block(j1,j2,j);
|
||||
const int j1 = idxz(jjz,0);
|
||||
const int j2 = idxz(jjz,1);
|
||||
const int j = idxz(jjz,2);
|
||||
const int ma1min = idxz(jjz,3);
|
||||
const int ma2max = idxz(jjz,4);
|
||||
const int mb1min = idxz(jjz,5);
|
||||
const int mb2max = idxz(jjz,6);
|
||||
const int na = idxz(jjz,7);
|
||||
const int nb = idxz(jjz,8);
|
||||
|
||||
zlist(iatom,jjz).re = 0.0;
|
||||
zlist(iatom,jjz).im = 0.0;
|
||||
const double* cgblock = cglist.data() + idxcg_block(j1,j2,j);
|
||||
|
||||
int jju1 = idxu_block[j1] + (j1+1)*mb1min;
|
||||
int jju2 = idxu_block[j2] + (j2+1)*mb2max;
|
||||
int icgb = mb1min*(j2+1) + mb2max;
|
||||
for(int ib = 0; ib < nb; ib++) {
|
||||
zlist(iatom,jjz).re = 0.0;
|
||||
zlist(iatom,jjz).im = 0.0;
|
||||
|
||||
double suma1_r = 0.0;
|
||||
double suma1_i = 0.0;
|
||||
int jju1 = idxu_block[j1] + (j1+1)*mb1min;
|
||||
int jju2 = idxu_block[j2] + (j2+1)*mb2max;
|
||||
int icgb = mb1min*(j2+1) + mb2max;
|
||||
for(int ib = 0; ib < nb; ib++) {
|
||||
|
||||
int ma1 = ma1min;
|
||||
int ma2 = ma2max;
|
||||
int icga = ma1min*(j2+1) + ma2max;
|
||||
for(int ia = 0; ia < na; ia++) {
|
||||
suma1_r += cgblock[icga] * (ulisttot(iatom,jju1+ma1).re * ulisttot(iatom,jju2+ma2).re - ulisttot(iatom,jju1+ma1).im * ulisttot(iatom,jju2+ma2).im);
|
||||
suma1_i += cgblock[icga] * (ulisttot(iatom,jju1+ma1).re * ulisttot(iatom,jju2+ma2).im + ulisttot(iatom,jju1+ma1).im * ulisttot(iatom,jju2+ma2).re);
|
||||
ma1++;
|
||||
ma2--;
|
||||
icga += j2;
|
||||
} // end loop over ia
|
||||
double suma1_r = 0.0;
|
||||
double suma1_i = 0.0;
|
||||
|
||||
zlist(iatom,jjz).re += cgblock[icgb] * suma1_r;
|
||||
zlist(iatom,jjz).im += cgblock[icgb] * suma1_i;
|
||||
int ma1 = ma1min;
|
||||
int ma2 = ma2max;
|
||||
int icga = ma1min*(j2+1) + ma2max;
|
||||
for(int ia = 0; ia < na; ia++) {
|
||||
suma1_r += cgblock[icga] * (ulisttot(iatom,jju1+ma1).re * ulisttot(iatom,jju2+ma2).re - ulisttot(iatom,jju1+ma1).im * ulisttot(iatom,jju2+ma2).im);
|
||||
suma1_i += cgblock[icga] * (ulisttot(iatom,jju1+ma1).re * ulisttot(iatom,jju2+ma2).im + ulisttot(iatom,jju1+ma1).im * ulisttot(iatom,jju2+ma2).re);
|
||||
ma1++;
|
||||
ma2--;
|
||||
icga += j2;
|
||||
} // end loop over ia
|
||||
|
||||
jju1 += j1+1;
|
||||
jju2 -= j2+1;
|
||||
icgb += j2;
|
||||
} // end loop over ib
|
||||
zlist(iatom,jjz).re += cgblock[icgb] * suma1_r;
|
||||
zlist(iatom,jjz).im += cgblock[icgb] * suma1_i;
|
||||
|
||||
}); // end loop over jjz
|
||||
jju1 += j1+1;
|
||||
jju2 -= j2+1;
|
||||
icgb += j2;
|
||||
} // end loop over ib
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
@ -386,102 +343,94 @@ void SNAKokkos<DeviceType>::compute_zi(const typename Kokkos::TeamPolicy<DeviceT
|
||||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void SNAKokkos<DeviceType>::compute_yi(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int iatom,
|
||||
void SNAKokkos<DeviceType>::zero_yi(const int& iatom)
|
||||
{
|
||||
for (int j = 0; j < idxu_max; j++)
|
||||
ylist(iatom,j) = {0.0,0.0};
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
compute Yi from Ui without storing Zi, looping over zlist indices
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void SNAKokkos<DeviceType>::compute_yi(int iter,
|
||||
const Kokkos::View<F_FLOAT**, DeviceType> &beta)
|
||||
{
|
||||
double betaj;
|
||||
const int ii = iatom;
|
||||
const int iatom = iter / idxz_max;
|
||||
const int jjz = iter % idxz_max;
|
||||
|
||||
{
|
||||
Kokkos::parallel_for(Kokkos::TeamThreadRange(team,ylist.extent(1)),
|
||||
[&] (const int& i) {
|
||||
ylist(iatom,i).re = 0.0;
|
||||
ylist(iatom,i).im = 0.0;
|
||||
});
|
||||
}
|
||||
const int j1 = idxz(jjz,0);
|
||||
const int j2 = idxz(jjz,1);
|
||||
const int j = idxz(jjz,2);
|
||||
const int ma1min = idxz(jjz,3);
|
||||
const int ma2max = idxz(jjz,4);
|
||||
const int mb1min = idxz(jjz,5);
|
||||
const int mb2max = idxz(jjz,6);
|
||||
const int na = idxz(jjz,7);
|
||||
const int nb = idxz(jjz,8);
|
||||
const int jju = idxz(jjz,9);
|
||||
|
||||
//int flopsum = 0;
|
||||
const double* cgblock = cglist.data() + idxcg_block(j1,j2,j);
|
||||
//int mb = (2 * (mb1min+mb2max) - j1 - j2 + j) / 2;
|
||||
//int ma = (2 * (ma1min+ma2max) - j1 - j2 + j) / 2;
|
||||
|
||||
Kokkos::parallel_for(Kokkos::TeamThreadRange(team,idxz_max),
|
||||
[&] (const int& jjz) {
|
||||
//for(int jjz = 0; jjz < idxz_max; jjz++) {
|
||||
const int j1 = idxz[jjz].j1;
|
||||
const int j2 = idxz[jjz].j2;
|
||||
const int j = idxz[jjz].j;
|
||||
const int ma1min = idxz[jjz].ma1min;
|
||||
const int ma2max = idxz[jjz].ma2max;
|
||||
const int na = idxz[jjz].na;
|
||||
const int mb1min = idxz[jjz].mb1min;
|
||||
const int mb2max = idxz[jjz].mb2max;
|
||||
const int nb = idxz[jjz].nb;
|
||||
double ztmp_r = 0.0;
|
||||
double ztmp_i = 0.0;
|
||||
|
||||
const double* cgblock = cglist.data() + idxcg_block(j1,j2,j);
|
||||
//int mb = (2 * (mb1min+mb2max) - j1 - j2 + j) / 2;
|
||||
//int ma = (2 * (ma1min+ma2max) - j1 - j2 + j) / 2;
|
||||
int jju1 = idxu_block[j1] + (j1+1)*mb1min;
|
||||
int jju2 = idxu_block[j2] + (j2+1)*mb2max;
|
||||
int icgb = mb1min*(j2+1) + mb2max;
|
||||
for(int ib = 0; ib < nb; ib++) {
|
||||
|
||||
double ztmp_r = 0.0;
|
||||
double ztmp_i = 0.0;
|
||||
double suma1_r = 0.0;
|
||||
double suma1_i = 0.0;
|
||||
|
||||
int jju1 = idxu_block[j1] + (j1+1)*mb1min;
|
||||
int jju2 = idxu_block[j2] + (j2+1)*mb2max;
|
||||
int icgb = mb1min*(j2+1) + mb2max;
|
||||
for(int ib = 0; ib < nb; ib++) {
|
||||
int ma1 = ma1min;
|
||||
int ma2 = ma2max;
|
||||
int icga = ma1min*(j2+1) + ma2max;
|
||||
|
||||
double suma1_r = 0.0;
|
||||
double suma1_i = 0.0;
|
||||
for(int ia = 0; ia < na; ia++) {
|
||||
suma1_r += cgblock[icga] * (ulisttot_lr(iatom,jju1+ma1).re * ulisttot_lr(iatom,jju2+ma2).re - ulisttot_lr(iatom,jju1+ma1).im * ulisttot_lr(iatom,jju2+ma2).im);
|
||||
suma1_i += cgblock[icga] * (ulisttot_lr(iatom,jju1+ma1).re * ulisttot_lr(iatom,jju2+ma2).im + ulisttot_lr(iatom,jju1+ma1).im * ulisttot_lr(iatom,jju2+ma2).re);
|
||||
ma1++;
|
||||
ma2--;
|
||||
icga += j2;
|
||||
} // end loop over ia
|
||||
|
||||
int ma1 = ma1min;
|
||||
int ma2 = ma2max;
|
||||
int icga = ma1min*(j2+1) + ma2max;
|
||||
ztmp_r += cgblock[icgb] * suma1_r;
|
||||
ztmp_i += cgblock[icgb] * suma1_i;
|
||||
jju1 += j1+1;
|
||||
jju2 -= j2+1;
|
||||
icgb += j2;
|
||||
} // end loop over ib
|
||||
|
||||
for(int ia = 0; ia < na; ia++) {
|
||||
suma1_r += cgblock[icga] * (ulisttot(iatom,jju1+ma1).re * ulisttot(iatom,jju2+ma2).re - ulisttot(iatom,jju1+ma1).im * ulisttot(iatom,jju2+ma2).im);
|
||||
suma1_i += cgblock[icga] * (ulisttot(iatom,jju1+ma1).re * ulisttot(iatom,jju2+ma2).im + ulisttot(iatom,jju1+ma1).im * ulisttot(iatom,jju2+ma2).re);
|
||||
//flopsum += 10;
|
||||
ma1++;
|
||||
ma2--;
|
||||
icga += j2;
|
||||
} // end loop over ia
|
||||
|
||||
ztmp_r += cgblock[icgb] * suma1_r;
|
||||
ztmp_i += cgblock[icgb] * suma1_i;
|
||||
jju1 += j1+1;
|
||||
jju2 -= j2+1;
|
||||
icgb += j2;
|
||||
} // end loop over ib
|
||||
|
||||
// apply to z(j1,j2,j,ma,mb) to unique element of y(j)
|
||||
// find right y_list[jju] and beta(ii,jjb) entries
|
||||
// multiply and divide by j+1 factors
|
||||
// account for multiplicity of 1, 2, or 3
|
||||
|
||||
const int jju = idxz[jjz].jju;
|
||||
// apply to z(j1,j2,j,ma,mb) to unique element of y(j)
|
||||
// find right y_list[jju] and beta(iatom,jjb) entries
|
||||
// multiply and divide by j+1 factors
|
||||
// account for multiplicity of 1, 2, or 3
|
||||
|
||||
// pick out right beta value
|
||||
|
||||
if (j >= j1) {
|
||||
const int jjb = idxb_block(j1,j2,j);
|
||||
if (j1 == j) {
|
||||
if (j2 == j) betaj = 3*beta(ii,jjb);
|
||||
else betaj = 2*beta(ii,jjb);
|
||||
} else betaj = beta(ii,jjb);
|
||||
} else if (j >= j2) {
|
||||
const int jjb = idxb_block(j,j2,j1);
|
||||
if (j2 == j) betaj = 2*beta(ii,jjb)*(j1+1)/(j+1.0);
|
||||
else betaj = beta(ii,jjb)*(j1+1)/(j+1.0);
|
||||
} else {
|
||||
const int jjb = idxb_block(j2,j,j1);
|
||||
betaj = beta(ii,jjb)*(j1+1)/(j+1.0);
|
||||
}
|
||||
if (j >= j1) {
|
||||
const int jjb = idxb_block(j1,j2,j);
|
||||
if (j1 == j) {
|
||||
if (j2 == j) betaj = 3*beta(iatom,jjb);
|
||||
else betaj = 2*beta(iatom,jjb);
|
||||
} else betaj = beta(iatom,jjb);
|
||||
} else if (j >= j2) {
|
||||
const int jjb = idxb_block(j,j2,j1);
|
||||
if (j2 == j) betaj = 2*beta(iatom,jjb)*(j1+1)/(j+1.0);
|
||||
else betaj = beta(iatom,jjb)*(j1+1)/(j+1.0);
|
||||
} else {
|
||||
const int jjb = idxb_block(j2,j,j1);
|
||||
betaj = beta(iatom,jjb)*(j1+1)/(j+1.0);
|
||||
}
|
||||
|
||||
Kokkos::single(Kokkos::PerThread(team), [&] () {
|
||||
Kokkos::atomic_add(&(ylist(iatom,jju).re), betaj*ztmp_r);
|
||||
Kokkos::atomic_add(&(ylist(iatom,jju).im), betaj*ztmp_i);
|
||||
});
|
||||
|
||||
}); // end loop over jjz
|
||||
|
||||
//printf("sum %i\n",flopsum);
|
||||
Kokkos::atomic_add(&(ylist(iatom,jju).re), betaj*ztmp_r);
|
||||
Kokkos::atomic_add(&(ylist(iatom,jju).im), betaj*ztmp_i);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
@ -556,9 +505,9 @@ void SNAKokkos<DeviceType>::compute_bi(const typename Kokkos::TeamPolicy<DeviceT
|
||||
Kokkos::parallel_for(Kokkos::TeamThreadRange(team,idxb_max),
|
||||
[&] (const int& jjb) {
|
||||
//for(int jjb = 0; jjb < idxb_max; jjb++) {
|
||||
const int j1 = idxb[jjb].j1;
|
||||
const int j2 = idxb[jjb].j2;
|
||||
const int j = idxb[jjb].j;
|
||||
const int j1 = idxb(jjb,0);
|
||||
const int j2 = idxb(jjb,1);
|
||||
const int j = idxb(jjb,2);
|
||||
|
||||
int jjz = idxz_block(j1,j2,j);
|
||||
int jju = idxu_block[j];
|
||||
@ -648,14 +597,16 @@ void SNAKokkos<DeviceType>::compute_duidrj(const typename Kokkos::TeamPolicy<Dev
|
||||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void SNAKokkos<DeviceType>::zero_uarraytot(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int iatom)
|
||||
void SNAKokkos<DeviceType>::zero_uarraytot(const int& iatom)
|
||||
{
|
||||
{
|
||||
Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,ulisttot.extent(1)),
|
||||
[&] (const int& i) {
|
||||
//Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,ulisttot.extent(1)),
|
||||
// [&] (const int& i) {
|
||||
for (int i = 0; i < ulisttot.extent(1); i++) {
|
||||
ulisttot(iatom,i).re = 0.0;
|
||||
ulisttot(iatom,i).im = 0.0;
|
||||
});
|
||||
}
|
||||
//});
|
||||
}
|
||||
}
|
||||
|
||||
@ -663,18 +614,18 @@ void SNAKokkos<DeviceType>::zero_uarraytot(const typename Kokkos::TeamPolicy<Dev
|
||||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void SNAKokkos<DeviceType>::addself_uarraytot(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int iatom, double wself_in)
|
||||
void SNAKokkos<DeviceType>::addself_uarraytot(const int& iatom, const double& wself_in)
|
||||
{
|
||||
Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,twojmax+1),
|
||||
[&] (const int& j) {
|
||||
//for (int j = 0; j <= twojmax; j++)
|
||||
//Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,twojmax+1),
|
||||
// [&] (const int& j) {
|
||||
for (int j = 0; j <= twojmax; j++) {
|
||||
int jju = idxu_block[j];
|
||||
for (int ma = 0; ma <= j; ma++) {
|
||||
ulisttot(iatom,jju).re = wself_in;
|
||||
ulisttot(iatom,jju).im = 0.0;
|
||||
jju += j+2;
|
||||
}
|
||||
});
|
||||
}//});
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
@ -786,6 +737,12 @@ void SNAKokkos<DeviceType>::compute_uarray(const typename Kokkos::TeamPolicy<Dev
|
||||
}
|
||||
}
|
||||
|
||||
template<class DeviceType>
|
||||
void SNAKokkos<DeviceType>::transpose_ulisttot()
|
||||
{
|
||||
UlisttotHelper<typename DeviceType::array_layout,decltype(ulisttot_lr),decltype(ulisttot)>::transpose(ulisttot_lr,ulisttot);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
compute derivatives of Wigner U-functions for one neighbor
|
||||
see comments in compute_uarray()
|
||||
@ -1318,6 +1275,8 @@ double SNAKokkos<DeviceType>::memory_usage()
|
||||
|
||||
bytes += natom * idxu_max * sizeof(double) * 2; // ulist
|
||||
bytes += natom * idxu_max * sizeof(double) * 2; // ulisttot
|
||||
if (!Kokkos::Impl::is_same<typename DeviceType::array_layout,Kokkos::LayoutRight>::value)
|
||||
bytes += natom * idxu_max * sizeof(double) * 2; // ulisttot_lr
|
||||
bytes += natom * idxu_max * 3 * sizeof(double) * 2; // dulist
|
||||
|
||||
bytes += natom * idxz_max * sizeof(double) * 2; // zlist
|
||||
@ -1329,8 +1288,8 @@ double SNAKokkos<DeviceType>::memory_usage()
|
||||
bytes += jdim * jdim * jdim * sizeof(int); // idxz_block
|
||||
bytes += jdim * jdim * jdim * sizeof(int); // idxb_block
|
||||
|
||||
bytes += idxz_max * sizeof(SNAKK_ZINDICES); // idxz
|
||||
bytes += idxb_max * sizeof(SNAKK_BINDICES); // idxb
|
||||
bytes += idxz_max * 10 * sizeof(int); // idxz
|
||||
bytes += idxb_max * 3 * sizeof(int); // idxb
|
||||
|
||||
bytes += jdim * sizeof(double); // bzero
|
||||
|
||||
|
||||
Reference in New Issue
Block a user