WIP
This commit is contained in:
@ -31,7 +31,10 @@ PairStyle(snap/kk/host,PairSNAPKokkos<LMPHostType>)
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
template<int NEIGHFLAG, int EVFLAG>
|
||||
struct TagPairSNAP{};
|
||||
struct TagPairSNAPCompute{};
|
||||
|
||||
struct TagPairSNAPBeta{};
|
||||
struct TagPairSNAPBispectrum{};
|
||||
|
||||
template<class DeviceType>
|
||||
class PairSNAPKokkos : public PairSNAP {
|
||||
@ -53,11 +56,17 @@ public:
|
||||
|
||||
template<int NEIGHFLAG, int EVFLAG>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (TagPairSNAP<NEIGHFLAG,EVFLAG>,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAP<NEIGHFLAG,EVFLAG> >::member_type& team) const;
|
||||
void operator() (TagPairSNAPCompute<NEIGHFLAG,EVFLAG>,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPCompute<NEIGHFLAG,EVFLAG> >::member_type& team) const;
|
||||
|
||||
template<int NEIGHFLAG, int EVFLAG>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (TagPairSNAP<NEIGHFLAG,EVFLAG>,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAP<NEIGHFLAG,EVFLAG> >::member_type& team, EV_FLOAT&) const;
|
||||
void operator() (TagPairSNAPCompute<NEIGHFLAG,EVFLAG>,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPCompute<NEIGHFLAG,EVFLAG> >::member_type& team, EV_FLOAT&) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (TagPairSNAPBeta,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPBeta>::member_type& team) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (TagPairSNAPBispectrum,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPBispectrum>::member_type& team) const;
|
||||
|
||||
template<int NEIGHFLAG>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
@ -82,10 +91,14 @@ protected:
|
||||
SNAKokkos<DeviceType> snaKK;
|
||||
|
||||
// How much parallelism to use within an interaction
|
||||
int vector_length;
|
||||
int vector_length,team_size;
|
||||
int team_scratch_size;
|
||||
int thread_scratch_size;
|
||||
|
||||
int eflag,vflag;
|
||||
|
||||
void compute_beta();
|
||||
void compute_bispectrum();
|
||||
void allocate();
|
||||
//void read_files(char *, char *);
|
||||
/*template<class DeviceType>
|
||||
@ -117,7 +130,9 @@ inline double dist2(double* x,double* y);
|
||||
Kokkos::View<F_FLOAT*, DeviceType> d_radelem; // element radii
|
||||
Kokkos::View<F_FLOAT*, DeviceType> d_wjelem; // elements weights
|
||||
Kokkos::View<F_FLOAT**, Kokkos::LayoutRight, DeviceType> d_coeffelem; // element bispectrum coefficients
|
||||
Kokkos::View<T_INT*, DeviceType> d_map; // mapping from atom types to elements
|
||||
Kokkos::View<T_INT*, DeviceType> d_map; // mapping from atom types to elements
|
||||
Kokkos::View<F_FLOAT**, DeviceType> d_beta; // betas for all atoms in list
|
||||
Kokkos::View<F_FLOAT**, DeviceType> d_bispectrum; // bispectrum components for all atoms in list
|
||||
|
||||
typedef Kokkos::DualView<F_FLOAT**, DeviceType> tdual_fparams;
|
||||
tdual_fparams k_cutsq;
|
||||
|
||||
@ -186,31 +186,45 @@ void PairSNAPKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
||||
|
||||
snaKK.nmax = max_neighs;
|
||||
|
||||
T_INT team_scratch_size = snaKK.size_team_scratch_arrays();
|
||||
T_INT thread_scratch_size = snaKK.size_thread_scratch_arrays();
|
||||
team_scratch_size = snaKK.size_team_scratch_arrays();
|
||||
thread_scratch_size = snaKK.size_thread_scratch_arrays();
|
||||
|
||||
//printf("Sizes: %i %i\n",team_scratch_size/1024,thread_scratch_size/1024);
|
||||
int team_size_max = Kokkos::TeamPolicy<DeviceType>::team_size_max(*this);
|
||||
int vector_length = 8;
|
||||
vector_length = 8;
|
||||
#ifdef KOKKOS_ENABLE_CUDA
|
||||
int team_size = 32;//max_neighs;
|
||||
team_size = 32;//max_neighs;
|
||||
if (team_size*vector_length > team_size_max)
|
||||
team_size = team_size_max/vector_length;
|
||||
#else
|
||||
int team_size = 1;
|
||||
team_size = 1;
|
||||
#endif
|
||||
|
||||
if (beta_max < list->inum) { // TODO: no init
|
||||
d_beta = Kokkos::View<F_FLOAT**, DeviceType>("PairSNAPKokkos:beta",
|
||||
list->inum,ncoeff);
|
||||
d_bispectrum = Kokkos::View<F_FLOAT**, DeviceType>("PairSNAPKokkos:bispectrum",
|
||||
list->inum,ncoeff);
|
||||
beta_max = list->inum;
|
||||
}
|
||||
|
||||
// compute dE_i/dB_i = beta_i for all i in list
|
||||
|
||||
if (quadraticflag || eflag)
|
||||
compute_bispectrum();
|
||||
compute_beta();
|
||||
|
||||
EV_FLOAT ev;
|
||||
|
||||
if (eflag) {
|
||||
if (neighflag == HALF) {
|
||||
typename Kokkos::TeamPolicy<DeviceType, TagPairSNAP<HALF,1> > policy(inum,team_size,vector_length);
|
||||
typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPCompute<HALF,1> > policy(inum,team_size,vector_length);
|
||||
Kokkos::parallel_reduce(policy
|
||||
.set_scratch_size(1,Kokkos::PerThread(thread_scratch_size))
|
||||
.set_scratch_size(1,Kokkos::PerTeam(team_scratch_size))
|
||||
,*this,ev);
|
||||
} else if (neighflag == HALFTHREAD) {
|
||||
typename Kokkos::TeamPolicy<DeviceType, TagPairSNAP<HALFTHREAD,1> > policy(inum,team_size,vector_length);
|
||||
typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPCompute<HALFTHREAD,1> > policy(inum,team_size,vector_length);
|
||||
Kokkos::parallel_reduce(policy
|
||||
.set_scratch_size(1,Kokkos::PerThread(thread_scratch_size))
|
||||
.set_scratch_size(1,Kokkos::PerTeam(team_scratch_size))
|
||||
@ -218,13 +232,13 @@ void PairSNAPKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
||||
}
|
||||
} else {
|
||||
if (neighflag == HALF) {
|
||||
typename Kokkos::TeamPolicy<DeviceType, TagPairSNAP<HALF,0> > policy(inum,team_size,vector_length);
|
||||
typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPCompute<HALF,0> > policy(inum,team_size,vector_length);
|
||||
Kokkos::parallel_for(policy
|
||||
.set_scratch_size(1,Kokkos::PerThread(thread_scratch_size))
|
||||
.set_scratch_size(1,Kokkos::PerTeam(team_scratch_size))
|
||||
,*this);
|
||||
} else if (neighflag == HALFTHREAD) {
|
||||
typename Kokkos::TeamPolicy<DeviceType, TagPairSNAP<HALFTHREAD,0> > policy(inum,team_size,vector_length);
|
||||
typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPCompute<HALFTHREAD,0> > policy(inum,team_size,vector_length);
|
||||
Kokkos::parallel_for(policy
|
||||
.set_scratch_size(1,Kokkos::PerThread(thread_scratch_size))
|
||||
.set_scratch_size(1,Kokkos::PerTeam(team_scratch_size))
|
||||
@ -232,11 +246,6 @@ void PairSNAPKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
||||
}
|
||||
}
|
||||
|
||||
//static int step =0;
|
||||
//step++;
|
||||
//if (step%10==0)
|
||||
// printf(" %e %e %e %e %e (%e %e): %e\n",t1,t2,t3,t4,t5,t6,t7,t1+t2+t3+t4+t5);
|
||||
|
||||
if (need_dup)
|
||||
Kokkos::Experimental::contribute(f, dup_f);
|
||||
|
||||
@ -275,6 +284,153 @@ void PairSNAPKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
compute beta
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void PairSNAPKokkos<DeviceType>::compute_beta()
|
||||
{
|
||||
// TODO: use RangePolicy instead, or thread over ncoeff?
|
||||
int inum = list->inum;
|
||||
typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPBeta> policy(inum,team_size,vector_length);
|
||||
Kokkos::parallel_for(policy
|
||||
.set_scratch_size(1,Kokkos::PerThread(thread_scratch_size))
|
||||
.set_scratch_size(1,Kokkos::PerTeam(team_scratch_size))
|
||||
,*this);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPBeta,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPBeta>::member_type& team) const {
|
||||
|
||||
const int ii = team.league_rank();
|
||||
const int i = d_ilist[ii];
|
||||
const int itype = type[i];
|
||||
const int ielem = map[itype];
|
||||
Kokkos::View<double*,Kokkos::LayoutRight,DeviceType,Kokkos::MemoryTraits<Kokkos::Unmanaged>>
|
||||
d_coeffi(d_coeffelem,ielem,Kokkos::ALL);
|
||||
|
||||
for (int icoeff = 0; icoeff < ncoeff; icoeff++)
|
||||
d_beta(ii,icoeff) = d_coeffi[icoeff+1];
|
||||
|
||||
if (quadraticflag) {
|
||||
int k = ncoeff+1;
|
||||
for (int icoeff = 0; icoeff < ncoeff; icoeff++) {
|
||||
double bveci = d_bispectrum(ii,icoeff);
|
||||
d_beta(ii,icoeff) += d_coeffi[k]*bveci;
|
||||
k++;
|
||||
for (int jcoeff = icoeff+1; jcoeff < ncoeff; jcoeff++) {
|
||||
double bvecj = d_bispectrum(ii,jcoeff);
|
||||
d_beta(ii,icoeff) += d_coeffi[k]*bvecj;
|
||||
d_beta(ii,jcoeff) += d_coeffi[k]*bveci;
|
||||
k++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
compute bispectrum
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void PairSNAPKokkos<DeviceType>::compute_bispectrum()
|
||||
{
|
||||
int inum = list->inum;
|
||||
typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPBispectrum> policy(inum,team_size,vector_length);
|
||||
Kokkos::parallel_for(policy
|
||||
.set_scratch_size(1,Kokkos::PerThread(thread_scratch_size))
|
||||
.set_scratch_size(1,Kokkos::PerTeam(team_scratch_size))
|
||||
,*this);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPBispectrum,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPBispectrum>::member_type& team) const {
|
||||
|
||||
const int ii = team.league_rank();
|
||||
const int i = d_ilist[ii];
|
||||
SNAKokkos<DeviceType> my_sna(snaKK,team);
|
||||
const double xtmp = x(i,0);
|
||||
const double ytmp = x(i,1);
|
||||
const double ztmp = x(i,2);
|
||||
const int itype = type[i];
|
||||
const int ielem = d_map[itype];
|
||||
const double radi = d_radelem[ielem];
|
||||
|
||||
const int num_neighs = d_numneigh[i];
|
||||
|
||||
// rij[][3] = displacements between atom I and those neighbors
|
||||
// inside = indices of neighbors of I within cutoff
|
||||
// wj = weights for neighbors of I within cutoff
|
||||
// rcutij = cutoffs for neighbors of I within cutoff
|
||||
// note Rij sign convention => dU/dRij = dU/dRj = -dU/dRi
|
||||
|
||||
int ninside = 0;
|
||||
Kokkos::parallel_reduce(Kokkos::TeamThreadRange(team,num_neighs),
|
||||
[&] (const int jj, int& count) {
|
||||
Kokkos::single(Kokkos::PerThread(team), [&] (){
|
||||
T_INT j = d_neighbors(i,jj);
|
||||
const F_FLOAT dx = x(j,0) - xtmp;
|
||||
const F_FLOAT dy = x(j,1) - ytmp;
|
||||
const F_FLOAT dz = x(j,2) - ztmp;
|
||||
|
||||
const int jtype = type(j);
|
||||
const F_FLOAT rsq = dx*dx + dy*dy + dz*dz;
|
||||
const int elem_j = d_map[jtype];
|
||||
|
||||
if ( rsq < rnd_cutsq(itype,jtype) )
|
||||
count++;
|
||||
});
|
||||
},ninside);
|
||||
|
||||
if (team.team_rank() == 0)
|
||||
Kokkos::parallel_scan(Kokkos::ThreadVectorRange(team,num_neighs),
|
||||
[&] (const int jj, int& offset, bool final) {
|
||||
//for (int jj = 0; jj < num_neighs; jj++) {
|
||||
T_INT j = d_neighbors(i,jj);
|
||||
const F_FLOAT dx = x(j,0) - xtmp;
|
||||
const F_FLOAT dy = x(j,1) - ytmp;
|
||||
const F_FLOAT dz = x(j,2) - ztmp;
|
||||
|
||||
const int jtype = type(j);
|
||||
const F_FLOAT rsq = dx*dx + dy*dy + dz*dz;
|
||||
const int elem_j = d_map[jtype];
|
||||
|
||||
if ( rsq < rnd_cutsq(itype,jtype) ) {
|
||||
if (final) {
|
||||
my_sna.rij(offset,0) = dx;
|
||||
my_sna.rij(offset,1) = dy;
|
||||
my_sna.rij(offset,2) = dz;
|
||||
my_sna.inside[offset] = j;
|
||||
my_sna.wj[offset] = d_wjelem[elem_j];
|
||||
my_sna.rcutij[offset] = (radi + d_radelem[elem_j])*rcutfac;
|
||||
}
|
||||
offset++;
|
||||
}
|
||||
});
|
||||
team.team_barrier();
|
||||
|
||||
// compute Ui, Zi, and Bi for atom I
|
||||
|
||||
my_sna.compute_ui(team,ninside);
|
||||
team.team_barrier();
|
||||
|
||||
my_sna.compute_zi(team);
|
||||
team.team_barrier();
|
||||
|
||||
my_sna.compute_bi(team);
|
||||
team.team_barrier();
|
||||
|
||||
for (int icoeff = 0; icoeff < ncoeff; icoeff++)
|
||||
d_bispectrum(ii,icoeff) = my_sna.blist[icoeff];
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
allocate all arrays
|
||||
------------------------------------------------------------------------- */
|
||||
@ -354,7 +510,7 @@ void PairSNAPKokkos<DeviceType>::coeff(int narg, char **arg)
|
||||
template<class DeviceType>
|
||||
template<int NEIGHFLAG, int EVFLAG>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAP<NEIGHFLAG,EVFLAG>,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAP<NEIGHFLAG,EVFLAG> >::member_type& team, EV_FLOAT& ev) const {
|
||||
void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPCompute<NEIGHFLAG,EVFLAG>,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPCompute<NEIGHFLAG,EVFLAG> >::member_type& team, EV_FLOAT& ev) const {
|
||||
|
||||
// The f array is duplicated for OpenMP, atomic for CUDA, and neither for Serial
|
||||
|
||||
@ -364,12 +520,12 @@ void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAP<NEIGHFLAG,EVFLAG>,const
|
||||
const int ii = team.league_rank();
|
||||
const int i = d_ilist[ii];
|
||||
SNAKokkos<DeviceType> my_sna(snaKK,team);
|
||||
const double x_i = x(i,0);
|
||||
const double y_i = x(i,1);
|
||||
const double z_i = x(i,2);
|
||||
const int type_i = type[i];
|
||||
const int elem_i = d_map[type_i];
|
||||
const double radi = d_radelem[elem_i];
|
||||
const double xtmp = x(i,0);
|
||||
const double ytmp = x(i,1);
|
||||
const double ztmp = x(i,2);
|
||||
const int itype = type[i];
|
||||
const int ielem = d_map[itype];
|
||||
const double radi = d_radelem[ielem];
|
||||
|
||||
const int num_neighs = d_numneigh[i];
|
||||
|
||||
@ -379,41 +535,38 @@ void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAP<NEIGHFLAG,EVFLAG>,const
|
||||
// rcutij = cutoffs for neighbors of I within cutoff
|
||||
// note Rij sign convention => dU/dRij = dU/dRj = -dU/dRi
|
||||
|
||||
//Kokkos::Timer timer;
|
||||
int ninside = 0;
|
||||
Kokkos::parallel_reduce(Kokkos::TeamThreadRange(team,num_neighs),
|
||||
[&] (const int jj, int& count) {
|
||||
Kokkos::single(Kokkos::PerThread(team), [&] (){
|
||||
T_INT j = d_neighbors(i,jj);
|
||||
const F_FLOAT dx = x(j,0) - x_i;
|
||||
const F_FLOAT dy = x(j,1) - y_i;
|
||||
const F_FLOAT dz = x(j,2) - z_i;
|
||||
const F_FLOAT dx = x(j,0) - xtmp;
|
||||
const F_FLOAT dy = x(j,1) - ytmp;
|
||||
const F_FLOAT dz = x(j,2) - ztmp;
|
||||
|
||||
const int type_j = type(j);
|
||||
const int jtype = type(j);
|
||||
const F_FLOAT rsq = dx*dx + dy*dy + dz*dz;
|
||||
const int elem_j = d_map[type_j];
|
||||
const int elem_j = d_map[jtype];
|
||||
|
||||
if ( rsq < rnd_cutsq(type_i,type_j) )
|
||||
if ( rsq < rnd_cutsq(itype,jtype) )
|
||||
count++;
|
||||
});
|
||||
},ninside);
|
||||
|
||||
//t1 += timer.seconds(); timer.reset();
|
||||
|
||||
if (team.team_rank() == 0)
|
||||
Kokkos::parallel_scan(Kokkos::ThreadVectorRange(team,num_neighs),
|
||||
[&] (const int jj, int& offset, bool final){
|
||||
[&] (const int jj, int& offset, bool final) {
|
||||
//for (int jj = 0; jj < num_neighs; jj++) {
|
||||
T_INT j = d_neighbors(i,jj);
|
||||
const F_FLOAT dx = x(j,0) - x_i;
|
||||
const F_FLOAT dy = x(j,1) - y_i;
|
||||
const F_FLOAT dz = x(j,2) - z_i;
|
||||
const F_FLOAT dx = x(j,0) - xtmp;
|
||||
const F_FLOAT dy = x(j,1) - ytmp;
|
||||
const F_FLOAT dz = x(j,2) - ztmp;
|
||||
|
||||
const int type_j = type(j);
|
||||
const int jtype = type(j);
|
||||
const F_FLOAT rsq = dx*dx + dy*dy + dz*dz;
|
||||
const int elem_j = d_map[type_j];
|
||||
const int elem_j = d_map[jtype];
|
||||
|
||||
if ( rsq < rnd_cutsq(type_i,type_j) ) {
|
||||
if ( rsq < rnd_cutsq(itype,jtype) ) {
|
||||
if (final) {
|
||||
my_sna.rij(offset,0) = dx;
|
||||
my_sna.rij(offset,1) = dy;
|
||||
@ -425,157 +578,85 @@ void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAP<NEIGHFLAG,EVFLAG>,const
|
||||
offset++;
|
||||
}
|
||||
});
|
||||
|
||||
//t2 += timer.seconds(); timer.reset();
|
||||
|
||||
team.team_barrier();
|
||||
// compute Ui, Zi, and Bi for atom I
|
||||
|
||||
// compute Ui, Yi for atom I
|
||||
|
||||
my_sna.compute_ui(team,ninside);
|
||||
//t3 += timer.seconds(); timer.reset();
|
||||
team.team_barrier();
|
||||
my_sna.compute_zi(team);
|
||||
//t4 += timer.seconds(); timer.reset();
|
||||
team.team_barrier();
|
||||
|
||||
if (quadraticflag) {
|
||||
my_sna.compute_bi(team);
|
||||
team.team_barrier();
|
||||
my_sna.copy_bi2bvec(team);
|
||||
team.team_barrier();
|
||||
}
|
||||
|
||||
// for neighbors of I within cutoff:
|
||||
// compute dUi/drj and dBi/drj
|
||||
// Fij = dEi/dRj = -dEi/dRi => add to Fi, subtract from Fj
|
||||
// compute Fij = dEi/dRj = -dEi/dRi
|
||||
// add to Fi, subtract from Fj
|
||||
|
||||
my_sna.compute_yi(team,d_beta,ii);
|
||||
team.team_barrier();
|
||||
|
||||
Kokkos::View<double*,Kokkos::LayoutRight,DeviceType,Kokkos::MemoryTraits<Kokkos::Unmanaged>>
|
||||
d_coeffi(d_coeffelem,elem_i,Kokkos::ALL);
|
||||
d_coeffi(d_coeffelem,ielem,Kokkos::ALL);
|
||||
|
||||
Kokkos::parallel_for (Kokkos::TeamThreadRange(team,ninside),
|
||||
[&] (const int jj) {
|
||||
//for (int jj = 0; jj < ninside; jj++) {
|
||||
int j = my_sna.inside[jj];
|
||||
|
||||
//Kokkos::Timer timer2;
|
||||
my_sna.compute_duidrj(team,&my_sna.rij(jj,0),
|
||||
my_sna.wj[jj],my_sna.rcutij[jj]);
|
||||
//t6 += timer2.seconds(); timer2.reset();
|
||||
my_sna.compute_dbidrj(team);
|
||||
//t7 += timer2.seconds(); timer2.reset();
|
||||
my_sna.copy_dbi2dbvec(team);
|
||||
|
||||
Kokkos::single(Kokkos::PerThread(team), [&] (){
|
||||
F_FLOAT fij[3];
|
||||
|
||||
fij[0] = 0.0;
|
||||
fij[1] = 0.0;
|
||||
fij[2] = 0.0;
|
||||
|
||||
// linear contributions
|
||||
|
||||
for (int k = 1; k <= ncoeff; k++) {
|
||||
double bgb = d_coeffi[k];
|
||||
fij[0] += bgb*my_sna.dbvec(k-1,0);
|
||||
fij[1] += bgb*my_sna.dbvec(k-1,1);
|
||||
fij[2] += bgb*my_sna.dbvec(k-1,2);
|
||||
}
|
||||
|
||||
if (quadraticflag) {
|
||||
|
||||
int k = ncoeff+1;
|
||||
for (int icoeff = 0; icoeff < ncoeff; icoeff++) {
|
||||
double bveci = my_sna.bvec[icoeff];
|
||||
double fack = d_coeffi[k]*bveci;
|
||||
double dbvecix = my_sna.dbvec(icoeff,0);
|
||||
double dbveciy = my_sna.dbvec(icoeff,1);
|
||||
double dbveciz = my_sna.dbvec(icoeff,2);
|
||||
fij[0] += fack*dbvecix;
|
||||
fij[1] += fack*dbveciy;
|
||||
fij[2] += fack*dbveciz;
|
||||
k++;
|
||||
for (int jcoeff = icoeff+1; jcoeff < ncoeff; jcoeff++) {
|
||||
double facki = d_coeffi[k]*bveci;
|
||||
double fackj = d_coeffi[k]*my_sna.bvec[jcoeff];
|
||||
fij[0] += facki*my_sna.dbvec(jcoeff,0)+fackj*dbvecix;
|
||||
fij[1] += facki*my_sna.dbvec(jcoeff,1)+fackj*dbveciy;
|
||||
fij[2] += facki*my_sna.dbvec(jcoeff,2)+fackj*dbveciz;
|
||||
k++;
|
||||
F_FLOAT fij[3];
|
||||
my_sna.compute_deidrj(team,fij);
|
||||
|
||||
a_f(i,0) += fij[0];
|
||||
a_f(i,1) += fij[1];
|
||||
a_f(i,2) += fij[2];
|
||||
a_f(j,0) -= fij[0];
|
||||
a_f(j,1) -= fij[1];
|
||||
a_f(j,2) -= fij[2];
|
||||
|
||||
// tally global and per-atom virial contribution
|
||||
|
||||
if (EVFLAG) {
|
||||
if (vflag_either) {
|
||||
v_tally_xyz<NEIGHFLAG>(ev,i,j,
|
||||
fij[0],fij[1],fij[2],
|
||||
-my_sna.rij(jj,0),-my_sna.rij(jj,1),
|
||||
-my_sna.rij(jj,2));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Hard-coded ZBL potential
|
||||
//const double dx = my_sna.rij(jj,0);
|
||||
//const double dy = my_sna.rij(jj,1);
|
||||
//const double dz = my_sna.rij(jj,2);
|
||||
//const double fdivr = -1.5e6/pow(dx*dx + dy*dy + dz*dz,7.0);
|
||||
//fij[0] += dx*fdivr;
|
||||
//fij[1] += dy*fdivr;
|
||||
//fij[2] += dz*fdivr;
|
||||
|
||||
//OK
|
||||
//printf("%lf %lf %lf %lf %lf %lf %lf %lf %lf SNAP-COMPARE: FIJ\n"
|
||||
// ,x(i,0),x(i,1),x(i,2),x(j,0),x(j,1),x(j,2),fij[0],fij[1],fij[2] );
|
||||
a_f(i,0) += fij[0];
|
||||
a_f(i,1) += fij[1];
|
||||
a_f(i,2) += fij[2];
|
||||
a_f(j,0) -= fij[0];
|
||||
a_f(j,1) -= fij[1];
|
||||
a_f(j,2) -= fij[2];
|
||||
|
||||
// tally global and per-atom virial contribution
|
||||
|
||||
if (EVFLAG) {
|
||||
if (vflag_either) {
|
||||
v_tally_xyz<NEIGHFLAG>(ev,i,j,
|
||||
fij[0],fij[1],fij[2],
|
||||
-my_sna.rij(jj,0),-my_sna.rij(jj,1),
|
||||
-my_sna.rij(jj,2));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
});
|
||||
});
|
||||
//t5 += timer.seconds(); timer.reset();
|
||||
|
||||
// tally energy contribution
|
||||
|
||||
if (EVFLAG) {
|
||||
if (eflag_either) {
|
||||
|
||||
if (!quadraticflag) {
|
||||
my_sna.compute_bi(team);
|
||||
team.team_barrier();
|
||||
my_sna.copy_bi2bvec(team);
|
||||
team.team_barrier();
|
||||
}
|
||||
|
||||
// E = beta.B + 0.5*B^t.alpha.B
|
||||
// coeff[k] = beta[k-1] or
|
||||
// coeff[k] = alpha_ii or
|
||||
// coeff[k] = alpha_ij = alpha_ji, j != i
|
||||
|
||||
Kokkos::single(Kokkos::PerTeam(team), [&] () {
|
||||
|
||||
// evdwl = energy of atom I, sum over coeffs_k * Bi_k
|
||||
|
||||
double evdwl = d_coeffi[0];
|
||||
|
||||
|
||||
// E = beta.B + 0.5*B^t.alpha.B
|
||||
|
||||
// linear contributions
|
||||
// could use thread vector range on this loop
|
||||
|
||||
for (int k = 1; k <= ncoeff; k++)
|
||||
evdwl += d_coeffi[k]*my_sna.bvec[k-1];
|
||||
|
||||
|
||||
for (int icoeff = 0; icoeff < ncoeff; icoeff++)
|
||||
evdwl += d_coeffi[icoeff+1]*d_bispectrum(ii,icoeff);
|
||||
|
||||
// quadratic contributions
|
||||
|
||||
|
||||
if (quadraticflag) {
|
||||
int k = ncoeff+1;
|
||||
for (int icoeff = 0; icoeff < ncoeff; icoeff++) {
|
||||
double bveci = my_sna.bvec[icoeff];
|
||||
double bveci = d_bispectrum(ii,icoeff);
|
||||
evdwl += 0.5*d_coeffi[k++]*bveci*bveci;
|
||||
for (int jcoeff = icoeff+1; jcoeff < ncoeff; jcoeff++) {
|
||||
evdwl += d_coeffi[k++]*bveci*my_sna.bvec[jcoeff];
|
||||
double bvecj = d_bispectrum(ii,jcoeff);
|
||||
evdwl += d_coeffi[k++]*bveci*bvecj;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -591,9 +672,9 @@ void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAP<NEIGHFLAG,EVFLAG>,const
|
||||
template<class DeviceType>
|
||||
template<int NEIGHFLAG, int EVFLAG>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAP<NEIGHFLAG,EVFLAG>,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAP<NEIGHFLAG,EVFLAG> >::member_type& team) const {
|
||||
void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPCompute<NEIGHFLAG,EVFLAG>,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPCompute<NEIGHFLAG,EVFLAG> >::member_type& team) const {
|
||||
EV_FLOAT ev;
|
||||
this->template operator()<NEIGHFLAG,EVFLAG>(TagPairSNAP<NEIGHFLAG,EVFLAG>(), team, ev);
|
||||
this->template operator()<NEIGHFLAG,EVFLAG>(TagPairSNAPCompute<NEIGHFLAG,EVFLAG>(), team, ev);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
@ -25,7 +25,11 @@
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
struct SNAKK_LOOPINDICES {
|
||||
struct SNAKK_ZINDICES {
|
||||
int j1, j2, j, ma1min, ma2max, mb1min, mb2max, na, nb, jju;
|
||||
};
|
||||
|
||||
struct SNAKK_BINDICES {
|
||||
int j1, j2, j;
|
||||
};
|
||||
|
||||
@ -35,9 +39,9 @@ class SNAKokkos {
|
||||
public:
|
||||
typedef Kokkos::View<int*, DeviceType> t_sna_1i;
|
||||
typedef Kokkos::View<double*, DeviceType> t_sna_1d;
|
||||
typedef Kokkos::View<double*, Kokkos::LayoutRight, DeviceType, Kokkos::MemoryTraits<Kokkos::Atomic> > t_sna_1d_atomic;
|
||||
typedef Kokkos::View<double**, Kokkos::LayoutRight, DeviceType> t_sna_2d;
|
||||
typedef Kokkos::View<double***, Kokkos::LayoutRight, DeviceType> t_sna_3d;
|
||||
typedef Kokkos::View<double***, Kokkos::LayoutRight, DeviceType, Kokkos::MemoryTraits<Kokkos::Atomic> > t_sna_3d_atomic;
|
||||
typedef Kokkos::View<double***[3], Kokkos::LayoutRight, DeviceType> t_sna_4d;
|
||||
typedef Kokkos::View<double**[3], Kokkos::LayoutRight, DeviceType> t_sna_3d3;
|
||||
typedef Kokkos::View<double*****, Kokkos::LayoutRight, DeviceType> t_sna_5d;
|
||||
@ -76,9 +80,10 @@ inline
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void compute_zi(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team); // ForceSNAP
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void compute_bi(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team); // ForceSNAP
|
||||
void compute_yi(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team,
|
||||
const Kokkos::View<F_FLOAT**, DeviceType> &beta, const int ii); // ForceSNAP
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void copy_bi2bvec(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team); //ForceSNAP
|
||||
void compute_bi(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team); // ForceSNAP
|
||||
|
||||
// functions for derivatives
|
||||
|
||||
@ -87,7 +92,7 @@ inline
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void compute_dbidrj(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team); //ForceSNAP
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void copy_dbi2dbvec(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team); //ForceSNAP
|
||||
void compute_deidrj(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, double *); // ForceSNAP
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
double compute_sfac(double, double); // add_uarraytot, compute_duarray
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
@ -114,37 +119,41 @@ inline
|
||||
|
||||
int twojmax, diagonalstyle;
|
||||
// Per InFlight Particle
|
||||
t_sna_3d barray;
|
||||
t_sna_3d uarraytot_r, uarraytot_i;
|
||||
t_sna_3d_atomic uarraytot_r_a, uarraytot_i_a;
|
||||
t_sna_5d zarray_r, zarray_i;
|
||||
t_sna_1d blist;
|
||||
t_sna_1d ulisttot_r, ulisttot_i;
|
||||
t_sna_1d_atomic ulisttot_r_a, ulisttot_i_a;
|
||||
t_sna_1d zlist_r, zlist_i;
|
||||
|
||||
// Per InFlight Interaction
|
||||
t_sna_3d uarray_r, uarray_i;
|
||||
|
||||
Kokkos::View<double*, Kokkos::LayoutRight, DeviceType> bvec;
|
||||
t_sna_1d ulist_r, ulist_i;
|
||||
t_sna_1d ylist_r, ylist_i;
|
||||
|
||||
// derivatives of data
|
||||
Kokkos::View<double*[3], Kokkos::LayoutRight, DeviceType> dbvec;
|
||||
t_sna_4d duarray_r, duarray_i;
|
||||
t_sna_4d dbarray;
|
||||
t_sna_2d dulist_r, dulist_i;
|
||||
t_sna_2d dblist;
|
||||
|
||||
private:
|
||||
double rmin0, rfac0;
|
||||
|
||||
//use indexlist instead of loops, constructor generates these
|
||||
// Same accross all SNAKokkos
|
||||
Kokkos::View<SNAKK_LOOPINDICES*, DeviceType> idxj,idxj_full;
|
||||
int idxj_max,idxj_full_max;
|
||||
// Same across all SNAKokkos
|
||||
Kokkos::View<SNAKK_ZINDICES*, DeviceType> idxz;
|
||||
Kokkos::View<SNAKK_BINDICES*, DeviceType> idxb;
|
||||
int idxcg_max, idxu_max, idxz_max, idxb_max;
|
||||
Kokkos::View<int***, DeviceType> idxcg_block;
|
||||
Kokkos::View<int*, DeviceType> idxu_block;
|
||||
Kokkos::View<int***, DeviceType> idxz_block;
|
||||
Kokkos::View<int***, DeviceType> idxb_block;
|
||||
|
||||
// data for bispectrum coefficients
|
||||
|
||||
// Same accross all SNAKokkos
|
||||
t_sna_5d cgarray;
|
||||
t_sna_1d cglist;
|
||||
t_sna_2d rootpqarray;
|
||||
|
||||
|
||||
static const int nmaxfactorial = 167;
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static const double nfac_table[];
|
||||
inline
|
||||
double factorial(int);
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user