Merge branch 'master' into test-updates

This commit is contained in:
Axel Kohlmeyer
2020-06-22 21:00:33 -04:00
7 changed files with 1219 additions and 543 deletions

View File

@ -30,19 +30,31 @@ PairStyle(snap/kk/host,PairSNAPKokkos<LMPHostType>)
namespace LAMMPS_NS {
// Routines for both the CPU and GPU backend
template<int NEIGHFLAG, int EVFLAG>
struct TagPairSNAPComputeForce{};
struct TagPairSNAPBeta{};
struct TagPairSNAPComputeNeigh{};
// GPU backend only
struct TagPairSNAPPreUi{};
struct TagPairSNAPComputeUi{};
struct TagPairSNAPComputeUiCPU{};
struct TagPairSNAPTransformUi{}; // re-order ulisttot from SoA to AoSoA, zero ylist
struct TagPairSNAPComputeZi{};
struct TagPairSNAPBeta{};
struct TagPairSNAPComputeBi{};
struct TagPairSNAPZeroYi{};
struct TagPairSNAPTransformBi{}; // re-order blist from AoSoA to AoS
struct TagPairSNAPComputeYi{};
struct TagPairSNAPTransformYi{}; // re-order ylist from AoSoA to AoS
struct TagPairSNAPComputeFusedDeidrj{};
// CPU backend only
struct TagPairSNAPPreUiCPU{};
struct TagPairSNAPComputeUiCPU{};
struct TagPairSNAPComputeZiCPU{};
struct TagPairSNAPBetaCPU{};
struct TagPairSNAPComputeBiCPU{};
struct TagPairSNAPZeroYiCPU{};
struct TagPairSNAPComputeYiCPU{};
struct TagPairSNAPComputeDuidrjCPU{};
struct TagPairSNAPComputeDeidrjCPU{};
@ -81,6 +93,10 @@ public:
KOKKOS_INLINE_FUNCTION
void operator() (TagPairSNAPComputeNeigh,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeNeigh>::member_type& team) const;
KOKKOS_INLINE_FUNCTION
void operator() (TagPairSNAPBetaCPU,const int& ii) const;
// GPU backend only
KOKKOS_INLINE_FUNCTION
void operator() (TagPairSNAPPreUi,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPPreUi>::member_type& team) const;
@ -88,32 +104,54 @@ public:
void operator() (TagPairSNAPComputeUi,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeUi>::member_type& team) const;
KOKKOS_INLINE_FUNCTION
void operator() (TagPairSNAPComputeUiCPU,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeUiCPU>::member_type& team) const;
void operator() (TagPairSNAPTransformUi,const int iatom_mod, const int idxu, const int iatom_div) const;
KOKKOS_INLINE_FUNCTION
void operator() (TagPairSNAPComputeZi,const int& ii) const;
void operator() (TagPairSNAPComputeZi,const int iatom_mod, const int idxz, const int iatom_div) const;
KOKKOS_INLINE_FUNCTION
void operator() (TagPairSNAPComputeBi,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeBi>::member_type& team) const;
void operator() (TagPairSNAPBeta, const int& ii) const;
KOKKOS_INLINE_FUNCTION
void operator() (TagPairSNAPZeroYi,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPZeroYi>::member_type& team) const;
void operator() (TagPairSNAPComputeBi,const int iatom_mod, const int idxb, const int iatom_div) const;
KOKKOS_INLINE_FUNCTION
void operator() (TagPairSNAPComputeYi,const int& ii) const;
void operator() (TagPairSNAPTransformBi,const int iatom_mod, const int idxb, const int iatom_div) const;
KOKKOS_INLINE_FUNCTION
void operator() (TagPairSNAPComputeYi,const int iatom_mod, const int idxz, const int iatom_div) const;
KOKKOS_INLINE_FUNCTION
void operator() (TagPairSNAPTransformYi,const int iatom_mod, const int idxu, const int iatom_div) const;
KOKKOS_INLINE_FUNCTION
void operator() (TagPairSNAPComputeFusedDeidrj,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeFusedDeidrj>::member_type& team) const;
// CPU backend only
KOKKOS_INLINE_FUNCTION
void operator() (TagPairSNAPPreUiCPU,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPPreUiCPU>::member_type& team) const;
KOKKOS_INLINE_FUNCTION
void operator() (TagPairSNAPComputeUiCPU,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeUiCPU>::member_type& team) const;
KOKKOS_INLINE_FUNCTION
void operator() (TagPairSNAPComputeZiCPU,const int& ii) const;
KOKKOS_INLINE_FUNCTION
void operator() (TagPairSNAPComputeBiCPU,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeBiCPU>::member_type& team) const;
KOKKOS_INLINE_FUNCTION
void operator() (TagPairSNAPZeroYiCPU,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPZeroYiCPU>::member_type& team) const;
KOKKOS_INLINE_FUNCTION
void operator() (TagPairSNAPComputeYiCPU,const int& ii) const;
KOKKOS_INLINE_FUNCTION
void operator() (TagPairSNAPComputeDuidrjCPU,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeDuidrjCPU>::member_type& team) const;
KOKKOS_INLINE_FUNCTION
void operator() (TagPairSNAPComputeDeidrjCPU,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeDeidrjCPU>::member_type& team) const;
KOKKOS_INLINE_FUNCTION
void operator() (TagPairSNAPBeta,const int& ii) const;
template<int NEIGHFLAG>
KOKKOS_INLINE_FUNCTION
void v_tally_xyz(EV_FLOAT &ev, const int &i, const int &j,
@ -175,6 +213,7 @@ inline double dist2(double* x,double* y);
Kokkos::View<T_INT*, DeviceType> d_map; // mapping from atom types to elements
Kokkos::View<T_INT*, DeviceType> d_ninside; // ninside for all atoms in list
Kokkos::View<F_FLOAT**, DeviceType> d_beta; // betas for all atoms in list
Kokkos::View<F_FLOAT***, Kokkos::LayoutLeft, DeviceType> d_beta_pack; // betas for all atoms in list, GPU
Kokkos::View<F_FLOAT**, DeviceType> d_bispectrum; // bispectrum components for all atoms in list
typedef Kokkos::DualView<F_FLOAT**, DeviceType> tdual_fparams;

View File

@ -194,6 +194,8 @@ void PairSNAPKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
if (beta_max < inum) {
beta_max = inum;
d_beta = Kokkos::View<F_FLOAT**, DeviceType>("PairSNAPKokkos:beta",ncoeff,inum);
if (!host_flag)
d_beta_pack = Kokkos::View<F_FLOAT***, Kokkos::LayoutLeft, DeviceType>("PairSNAPKokkos:beta_pack",32,ncoeff,(inum+32-1)/32);
d_ninside = Kokkos::View<int*, DeviceType>("PairSNAPKokkos:ninside",inum);
}
@ -222,32 +224,93 @@ void PairSNAPKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
Kokkos::parallel_for("ComputeNeigh",policy_neigh,*this);
}
//PreUi
if (host_flag)
{
int vector_length = vector_length_default;
int team_size = team_size_default;
if (!host_flag)
vector_length = 32;
check_team_size_for<TagPairSNAPPreUi>(chunk_size,team_size,vector_length);
typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPPreUi> policy_preui((chunk_size+team_size-1)/team_size,team_size,vector_length);
Kokkos::parallel_for("PreUi",policy_preui,*this);
}
// Host codepath
// ComputeUI
{
int vector_length = vector_length_default;
int team_size = team_size_default;
if (host_flag) { // CPU
// Run a fused calculation of ulist and accumulation into ulisttot using atomics
//PreUi
{
int vector_length = vector_length_default;
int team_size = team_size_default;
check_team_size_for<TagPairSNAPPreUiCPU>(chunk_size,team_size,vector_length);
typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPPreUiCPU> policy_preui_cpu((chunk_size+team_size-1)/team_size,team_size,vector_length);
Kokkos::parallel_for("PreUiCPU",policy_preui_cpu,*this);
}
// ComputeUi
{
int vector_length = vector_length_default;
int team_size = team_size_default;
// Fused calculation of ulist and accumulation into ulisttot using atomics
typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPComputeUiCPU> policy_ui_cpu(((chunk_size+team_size-1)/team_size)*max_neighs,team_size,vector_length);
Kokkos::parallel_for("ComputeUiCPU",policy_ui_cpu,*this);
}
} else { // GPU, vector parallelism, shared memory, separate ulist and ulisttot to avoid atomics
//Compute bispectrum
if (quadraticflag || eflag) {
//ComputeZi
int idxz_max = snaKK.idxz_max;
typename Kokkos::RangePolicy<DeviceType,TagPairSNAPComputeZiCPU> policy_zi_cpu(0,chunk_size*idxz_max);
Kokkos::parallel_for("ComputeZiCPU",policy_zi_cpu,*this);
vector_length = 32;
team_size = 4; // need to cap b/c of shared memory reqs
//ComputeBi
int vector_length = vector_length_default;
int team_size = team_size_default;
check_team_size_for<TagPairSNAPComputeBiCPU>(chunk_size,team_size,vector_length);
typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPComputeBiCPU> policy_bi_cpu(chunk_size,team_size,vector_length);
Kokkos::parallel_for("ComputeBiCPU",policy_bi_cpu,*this);
}
//ZeroYi,ComputeYi
{
int vector_length = vector_length_default;
int team_size = team_size_default;
//Compute beta = dE_i/dB_i for all i in list
typename Kokkos::RangePolicy<DeviceType,TagPairSNAPBetaCPU> policy_beta(0,chunk_size);
Kokkos::parallel_for("ComputeBetaCPU",policy_beta,*this);
//ZeroYi
check_team_size_for<TagPairSNAPZeroYiCPU>(chunk_size,team_size,vector_length);
typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPZeroYiCPU> policy_zero_yi(((idxu_max+team_size-1)/team_size)*chunk_size,team_size,vector_length);
Kokkos::parallel_for("ZeroYiCPU",policy_zero_yi,*this);
//ComputeYi
int idxz_max = snaKK.idxz_max;
typename Kokkos::RangePolicy<DeviceType,TagPairSNAPComputeYiCPU> policy_yi_cpu(0,chunk_size*idxz_max);
Kokkos::parallel_for("ComputeYiCPU",policy_yi_cpu,*this);
} // host flag
//ComputeDuidrj and Deidrj
{
int team_size = team_size_default;
int vector_length = vector_length_default;
typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPComputeDuidrjCPU> policy_duidrj_cpu(((chunk_size+team_size-1)/team_size)*max_neighs,team_size,vector_length);
snaKK.set_dir(-1); // technically doesn't do anything
Kokkos::parallel_for("ComputeDuidrjCPU",policy_duidrj_cpu,*this);
typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPComputeDeidrjCPU> policy_deidrj_cpu(((chunk_size+team_size-1)/team_size)*max_neighs,team_size,vector_length);
Kokkos::parallel_for("ComputeDeidrjCPU",policy_deidrj_cpu,*this);
}
} else { // GPU
#ifdef KOKKOS_ENABLE_CUDA
//PreUi
{
int vector_length = vector_length_default;
int team_size = team_size_default;
check_team_size_for<TagPairSNAPPreUi>(chunk_size,team_size,vector_length);
typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPPreUi> policy_preui((chunk_size+team_size-1)/team_size,team_size,vector_length);
Kokkos::parallel_for("PreUi",policy_preui,*this);
}
// ComputeUi w/vector parallelism, shared memory, direct atomicAdd into ulisttot
{
int vector_length = 32;
int team_size = 4; // need to cap b/c of shared memory reqs
check_team_size_for<TagPairSNAPComputeUi>(chunk_size,team_size,vector_length);
// scratch size: 2 * team_size * (twojmax+1)^2, to cover all `m1`,`m2` values
@ -265,62 +328,54 @@ void PairSNAPKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
Kokkos::parallel_for("ComputeUi",policy_ui,*this);
//Transform data layout of ulisttot to AoSoA, zero ylist
typename Kokkos::MDRangePolicy<DeviceType, Kokkos::IndexType<int>, Kokkos::Rank<3, Kokkos::Iterate::Left, Kokkos::Iterate::Left>, TagPairSNAPTransformUi> policy_transform_ui({0,0,0},{32,idxu_max,(chunk_size + 32 - 1) / 32},{32,4,1});
Kokkos::parallel_for("TransformUi",policy_transform_ui,*this);
}
}
//Compute bispectrum in AoSoA data layout, transform Bi
if (quadraticflag || eflag) {
//ComputeZi
int idxz_max = snaKK.idxz_max;
typename Kokkos::MDRangePolicy<DeviceType, Kokkos::IndexType<int>, Kokkos::Rank<3, Kokkos::Iterate::Left, Kokkos::Iterate::Left>, TagPairSNAPComputeZi> policy_compute_zi({0,0,0},{32,idxz_max,(chunk_size + 32 - 1) / 32},{32,4,1});
Kokkos::parallel_for("ComputeZi",policy_compute_zi,*this);
//Compute bispectrum
if (quadraticflag || eflag) {
//ComputeZi
int idxz_max = snaKK.idxz_max;
typename Kokkos::RangePolicy<DeviceType,TagPairSNAPComputeZi> policy_zi(0,chunk_size*idxz_max);
Kokkos::parallel_for("ComputeZi",policy_zi,*this);
//ComputeBi
int idxb_max = snaKK.idxb_max;
typename Kokkos::MDRangePolicy<DeviceType, Kokkos::IndexType<int>, Kokkos::Rank<3, Kokkos::Iterate::Left, Kokkos::Iterate::Left>, TagPairSNAPComputeBi> policy_compute_bi({0,0,0},{32,idxb_max,(chunk_size + 32 - 1) / 32},{32,4,1});
Kokkos::parallel_for("ComputeBi",policy_compute_bi,*this);
//ComputeBi
int vector_length = vector_length_default;
int team_size = team_size_default;
check_team_size_for<TagPairSNAPComputeBi>(chunk_size,team_size,vector_length);
typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPComputeBi> policy_bi(chunk_size,team_size,vector_length);
Kokkos::parallel_for("ComputeBi",policy_bi,*this);
}
//Transform data layout of blist out of AoSoA
//We need this b/c `blist` gets used in ComputeForce which doesn't
//take advantage of AoSoA (which at best would only be beneficial
//on the margins)
typename Kokkos::MDRangePolicy<DeviceType, Kokkos::IndexType<int>, Kokkos::Rank<3, Kokkos::Iterate::Left, Kokkos::Iterate::Left>, TagPairSNAPTransformBi> policy_transform_bi({0,0,0},{32,idxb_max,(chunk_size + 32 - 1) / 32},{32,4,1});
Kokkos::parallel_for("TransformBi",policy_transform_bi,*this);
}
//Compute beta = dE_i/dB_i for all i in list
typename Kokkos::RangePolicy<DeviceType,TagPairSNAPBeta> policy_beta(0,chunk_size);
Kokkos::parallel_for("ComputeBeta",policy_beta,*this);
//ComputeYi in AoSoA data layout, transform to AoS for ComputeFusedDeidrj
//Note zeroing `ylist` is fused into `TransformUi`.
{
//Compute beta = dE_i/dB_i for all i in list
typename Kokkos::RangePolicy<DeviceType,TagPairSNAPBeta> policy_beta(0,chunk_size);
Kokkos::parallel_for("ComputeBeta",policy_beta,*this);
//ZeroYi
{
int vector_length = vector_length_default;
int team_size = team_size_default;
if (!host_flag)
team_size = 128;
check_team_size_for<TagPairSNAPZeroYi>(chunk_size,team_size,vector_length);
typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPZeroYi> policy_zero_yi(((idxu_max+team_size-1)/team_size)*chunk_size,team_size,vector_length);
Kokkos::parallel_for("ZeroYi",policy_zero_yi,*this);
}
//ComputeYi
const int idxz_max = snaKK.idxz_max;
typename Kokkos::MDRangePolicy<DeviceType, Kokkos::IndexType<int>, Kokkos::Rank<3, Kokkos::Iterate::Left, Kokkos::Iterate::Left>, TagPairSNAPComputeYi> policy_compute_yi({0,0,0},{32,idxz_max,(chunk_size + 32 - 1) / 32},{32,4,1});
Kokkos::parallel_for("ComputeYi",policy_compute_yi,*this);
//ComputeYi
int idxz_max = snaKK.idxz_max;
typename Kokkos::RangePolicy<DeviceType,TagPairSNAPComputeYi> policy_yi(0,chunk_size*idxz_max);
Kokkos::parallel_for("ComputeYi",policy_yi,*this);
//Transform data layout of ylist out of AoSoA
typename Kokkos::MDRangePolicy<DeviceType, Kokkos::IndexType<int>, Kokkos::Rank<3, Kokkos::Iterate::Left, Kokkos::Iterate::Left>, TagPairSNAPTransformYi> policy_transform_yi({0,0,0},{32,idxu_max,(chunk_size + 32 - 1) / 32},{32,4,1});
Kokkos::parallel_for("TransformYi",policy_transform_yi,*this);
//ComputeDuidrj and Deidrj
{
int team_size = team_size_default;
int vector_length = vector_length_default;
if (host_flag) { // CPU
}
typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPComputeDuidrjCPU> policy_duidrj_cpu(((chunk_size+team_size-1)/team_size)*max_neighs,team_size,vector_length);
snaKK.set_dir(-1); // technically doesn't do anything
Kokkos::parallel_for("ComputeDuidrjCPU",policy_duidrj_cpu,*this);
typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPComputeDeidrjCPU> policy_deidrj_cpu(((chunk_size+team_size-1)/team_size)*max_neighs,team_size,vector_length);
Kokkos::parallel_for("ComputeDeidrjCPU",policy_deidrj_cpu,*this);
} else { // GPU, utilize scratch memory and splitting over dimensions, fused dui and dei
vector_length = 32;
team_size = 2; // need to cap b/c of shared memory reqs
// Fused ComputeDuidrj, ComputeDeidrj
{
int vector_length = 32;
int team_size = 2; // need to cap b/c of shared memory reqs
check_team_size_for<TagPairSNAPComputeFusedDeidrj>(chunk_size,team_size,vector_length);
// scratch size: 2 * 2 * team_size * (twojmax+1)*(twojmax/2+1), to cover half `m1`,`m2` values due to symmetry
@ -341,6 +396,9 @@ void PairSNAPKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
Kokkos::parallel_for("ComputeFusedDeidrj",policy_fused_deidrj,*this);
}
}
#endif // KOKKOS_ENABLE_CUDA
}
//ComputeForce
@ -416,38 +474,6 @@ void PairSNAPKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
}
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPBeta,const int& ii) const {
const int i = d_ilist[ii + chunk_offset];
const int itype = type[i];
const int ielem = d_map[itype];
SNAKokkos<DeviceType> my_sna = snaKK;
Kokkos::View<double*,Kokkos::LayoutRight,DeviceType,Kokkos::MemoryTraits<Kokkos::Unmanaged>>
d_coeffi(d_coeffelem,ielem,Kokkos::ALL);
for (int icoeff = 0; icoeff < ncoeff; icoeff++)
d_beta(icoeff,ii) = d_coeffi[icoeff+1];
if (quadraticflag) {
int k = ncoeff+1;
for (int icoeff = 0; icoeff < ncoeff; icoeff++) {
double bveci = my_sna.blist(icoeff,ii);
d_beta(icoeff,ii) += d_coeffi[k]*bveci;
k++;
for (int jcoeff = icoeff+1; jcoeff < ncoeff; jcoeff++) {
double bvecj = my_sna.blist(jcoeff,ii);
d_beta(icoeff,ii) += d_coeffi[k]*bvecj;
d_beta(jcoeff,ii) += d_coeffi[k]*bveci;
k++;
}
}
}
}
/* ----------------------------------------------------------------------
allocate all arrays
@ -520,6 +546,10 @@ void PairSNAPKokkos<DeviceType>::coeff(int narg, char **arg)
snaKK.init();
}
/* ----------------------------------------------------------------------
Begin routines that are called on both CPU and GPU codepaths
------------------------------------------------------------------------- */
/* ---------------------------------------------------------------------- */
template<class DeviceType>
@ -594,6 +624,53 @@ void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPComputeNeigh,const typen
});
}
/* ----------------------------------------------------------------------
Begin routines that are unique to the GPU codepath. These take advantage
of AoSoA data layouts and scratch memory for recursive polynomials
------------------------------------------------------------------------- */
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPBeta,const int& ii) const {
if (ii >= chunk_size) return;
const int iatom_mod = ii % 32;
const int iatom_div = ii / 32;
const int i = d_ilist[ii + chunk_offset];
const int itype = type[i];
const int ielem = d_map[itype];
SNAKokkos<DeviceType> my_sna = snaKK;
Kokkos::View<double*,Kokkos::LayoutRight,DeviceType,Kokkos::MemoryTraits<Kokkos::Unmanaged>>
d_coeffi(d_coeffelem,ielem,Kokkos::ALL);
for (int icoeff = 0; icoeff < ncoeff; icoeff++) {
d_beta_pack(iatom_mod,icoeff,iatom_div) = d_coeffi[icoeff+1];
}
if (quadraticflag) {
const auto idxb_max = my_sna.idxb_max;
int k = ncoeff+1;
for (int icoeff = 0; icoeff < ncoeff; icoeff++) {
const auto idxb = icoeff % idxb_max;
const auto idx_chem = icoeff / idxb_max;
double bveci = my_sna.blist(idxb, idx_chem, ii);
d_beta_pack(iatom_mod,icoeff,iatom_div) += d_coeffi[k]*bveci;
k++;
for (int jcoeff = icoeff+1; jcoeff < ncoeff; jcoeff++) {
const auto jdxb = jcoeff % idxb_max;
const auto jdx_chem = jcoeff / idxb_max;
double bvecj = my_sna.blist(jdxb, jdx_chem, ii);
d_beta_pack(iatom_mod,icoeff,iatom_div) += d_coeffi[k]*bvecj;
d_beta_pack(iatom_mod,jcoeff,iatom_div) += d_coeffi[k]*bveci;
k++;
}
}
}
}
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPPreUi,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPPreUi>::member_type& team) const {
@ -627,61 +704,107 @@ void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPComputeUi,const typename
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPComputeUiCPU,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPComputeUiCPU>::member_type& team) const {
void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPTransformUi,const int iatom_mod, const int idxu, const int iatom_div) const {
SNAKokkos<DeviceType> my_sna = snaKK;
// Extract the atom number
int ii = team.team_rank() + team.team_size() * (team.league_rank() % ((chunk_size+team.team_size()-1)/team.team_size()));
if (ii >= chunk_size) return;
const int iatom = iatom_mod + iatom_div * 32;
if (iatom >= chunk_size) return;
// Extract the neighbor number
const int jj = team.league_rank() / ((chunk_size+team.team_size()-1)/team.team_size());
const int ninside = d_ninside(ii);
if (jj >= ninside) return;
if (idxu >= my_sna.idxu_max) return;
int elem_count = chemflag ? nelements : 1;
for (int ielem = 0; ielem < elem_count; ielem++) {
const auto utot_re = my_sna.ulisttot_re(idxu, ielem, iatom);
const auto utot_im = my_sna.ulisttot_im(idxu, ielem, iatom);
my_sna.ulisttot_pack(iatom_mod, idxu, ielem, iatom_div) = { utot_re, utot_im };
my_sna.ylist_pack_re(iatom_mod, idxu, ielem, iatom_div) = 0.;
my_sna.ylist_pack_im(iatom_mod, idxu, ielem, iatom_div) = 0.;
}
my_sna.compute_ui_cpu(team,ii,jj);
}
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPZeroYi,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPZeroYi>::member_type& team) const {
void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPComputeYi,const int iatom_mod, const int jjz, const int iatom_div) const {
SNAKokkos<DeviceType> my_sna = snaKK;
// Extract the quantum number
const int idx = team.team_rank() + team.team_size() * (team.league_rank() % ((my_sna.idxu_max+team.team_size()-1)/team.team_size()));
if (idx >= my_sna.idxu_max) return;
const int iatom = iatom_mod + iatom_div * 32;
if (iatom >= chunk_size) return;
// Extract the atomic index
const int ii = team.league_rank() / ((my_sna.idxu_max+team.team_size()-1)/team.team_size());
if (ii >= chunk_size) return;
if (jjz >= my_sna.idxz_max) return;
if (chemflag)
for(int ielem = 0; ielem < nelements; ielem++)
my_sna.zero_yi(idx,ii,ielem);
else
my_sna.zero_yi(idx,ii,0);
my_sna.compute_yi(iatom_mod,jjz,iatom_div,d_beta_pack);
}
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPComputeYi,const int& ii) const {
void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPTransformYi,const int iatom_mod, const int idxu, const int iatom_div) const {
SNAKokkos<DeviceType> my_sna = snaKK;
my_sna.compute_yi(ii,d_beta);
const int iatom = iatom_mod + iatom_div * 32;
if (iatom >= chunk_size) return;
if (idxu >= my_sna.idxu_max) return;
int elem_count = chemflag ? nelements : 1;
for (int ielem = 0; ielem < elem_count; ielem++) {
const auto y_re = my_sna.ylist_pack_re(iatom_mod, idxu, ielem, iatom_div);
const auto y_im = my_sna.ylist_pack_im(iatom_mod, idxu, ielem, iatom_div);
my_sna.ylist(idxu, ielem, iatom) = { y_re, y_im };
}
}
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPComputeZi,const int& ii) const {
void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPComputeZi,const int iatom_mod, const int jjz, const int iatom_div) const {
SNAKokkos<DeviceType> my_sna = snaKK;
my_sna.compute_zi(ii);
const int iatom = iatom_mod + iatom_div * 32;
if (iatom >= chunk_size) return;
if (jjz >= my_sna.idxz_max) return;
my_sna.compute_zi(iatom_mod,jjz,iatom_div);
}
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPComputeBi,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPComputeBi>::member_type& team) const {
int ii = team.league_rank();
void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPComputeBi,const int iatom_mod, const int jjb, const int iatom_div) const {
SNAKokkos<DeviceType> my_sna = snaKK;
my_sna.compute_bi(team,ii);
const int iatom = iatom_mod + iatom_div * 32;
if (iatom >= chunk_size) return;
if (jjb >= my_sna.idxb_max) return;
my_sna.compute_bi(iatom_mod,jjb,iatom_div);
}
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPTransformBi,const int iatom_mod, const int idxb, const int iatom_div) const {
SNAKokkos<DeviceType> my_sna = snaKK;
const int iatom = iatom_mod + iatom_div * 32;
if (iatom >= chunk_size) return;
if (idxb >= my_sna.idxb_max) return;
const int ntriples = my_sna.ntriples;
for (int itriple = 0; itriple < ntriples; itriple++) {
const auto blocal = my_sna.blist_pack(iatom_mod, idxb, itriple, iatom_div);
my_sna.blist(idxb, itriple, iatom) = blocal;
}
}
template<class DeviceType>
@ -701,6 +824,126 @@ void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPComputeFusedDeidrj,const
my_sna.compute_fused_deidrj(team,ii,jj);
}
/* ----------------------------------------------------------------------
Begin routines that are unique to the CPU codepath. These do not take
advantage of AoSoA data layouts, but that could be a good point of
future optimization and unification with the above kernels. It's unlikely
that scratch memory optimizations will ever be useful for the CPU due to
different arithmetic intensity requirements for the CPU vs GPU.
------------------------------------------------------------------------- */
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPBetaCPU,const int& ii) const {
const int i = d_ilist[ii + chunk_offset];
const int itype = type[i];
const int ielem = d_map[itype];
SNAKokkos<DeviceType> my_sna = snaKK;
Kokkos::View<double*,Kokkos::LayoutRight,DeviceType,Kokkos::MemoryTraits<Kokkos::Unmanaged>>
d_coeffi(d_coeffelem,ielem,Kokkos::ALL);
for (int icoeff = 0; icoeff < ncoeff; icoeff++)
d_beta(icoeff,ii) = d_coeffi[icoeff+1];
if (quadraticflag) {
const auto idxb_max = my_sna.idxb_max;
int k = ncoeff+1;
for (int icoeff = 0; icoeff < ncoeff; icoeff++) {
const auto idxb = icoeff % idxb_max;
const auto idx_chem = icoeff / idxb_max;
double bveci = my_sna.blist(idxb,idx_chem,ii);
d_beta(icoeff,ii) += d_coeffi[k]*bveci;
k++;
for (int jcoeff = icoeff+1; jcoeff < ncoeff; jcoeff++) {
const auto jdxb = jcoeff % idxb_max;
const auto jdx_chem = jcoeff / idxb_max;
double bvecj = my_sna.blist(jdxb,jdx_chem,ii);
d_beta(icoeff,ii) += d_coeffi[k]*bvecj;
d_beta(jcoeff,ii) += d_coeffi[k]*bveci;
k++;
}
}
}
}
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPPreUiCPU,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPPreUiCPU>::member_type& team) const {
SNAKokkos<DeviceType> my_sna = snaKK;
// Extract the atom number
const int ii = team.team_rank() + team.team_size() * team.league_rank();
if (ii >= chunk_size) return;
int itype = type(ii);
int ielem = d_map[itype];
my_sna.pre_ui_cpu(team,ii,ielem);
}
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPComputeUiCPU,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPComputeUiCPU>::member_type& team) const {
SNAKokkos<DeviceType> my_sna = snaKK;
// Extract the atom number
int ii = team.team_rank() + team.team_size() * (team.league_rank() % ((chunk_size+team.team_size()-1)/team.team_size()));
if (ii >= chunk_size) return;
// Extract the neighbor number
const int jj = team.league_rank() / ((chunk_size+team.team_size()-1)/team.team_size());
const int ninside = d_ninside(ii);
if (jj >= ninside) return;
my_sna.compute_ui_cpu(team,ii,jj);
}
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPZeroYiCPU,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPZeroYiCPU>::member_type& team) const {
SNAKokkos<DeviceType> my_sna = snaKK;
// Extract the quantum number
const int idx = team.team_rank() + team.team_size() * (team.league_rank() % ((my_sna.idxu_max+team.team_size()-1)/team.team_size()));
if (idx >= my_sna.idxu_max) return;
// Extract the atomic index
const int ii = team.league_rank() / ((my_sna.idxu_max+team.team_size()-1)/team.team_size());
if (ii >= chunk_size) return;
if (chemflag)
for(int ielem = 0; ielem < nelements; ielem++)
my_sna.zero_yi_cpu(idx,ii,ielem);
else
my_sna.zero_yi_cpu(idx,ii,0);
}
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPComputeYiCPU,const int& ii) const {
SNAKokkos<DeviceType> my_sna = snaKK;
my_sna.compute_yi_cpu(ii,d_beta);
}
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPComputeZiCPU,const int& ii) const {
SNAKokkos<DeviceType> my_sna = snaKK;
my_sna.compute_zi_cpu(ii);
}
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPComputeBiCPU,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPComputeBiCPU>::member_type& team) const {
int ii = team.league_rank();
SNAKokkos<DeviceType> my_sna = snaKK;
my_sna.compute_bi_cpu(team,ii);
}
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPComputeDuidrjCPU,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPComputeDuidrjCPU>::member_type& team) const {
@ -735,6 +978,12 @@ void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPComputeDeidrjCPU,const t
my_sna.compute_deidrj_cpu(team,ii,jj);
}
/* ----------------------------------------------------------------------
Also used for both CPU and GPU codepaths. Could maybe benefit from a
separate GPU/CPU codepath, but this kernel takes so little time it's
likely not worth it.
------------------------------------------------------------------------- */
template<class DeviceType>
template<int NEIGHFLAG, int EVFLAG>
KOKKOS_INLINE_FUNCTION
@ -799,20 +1048,31 @@ void PairSNAPKokkos<DeviceType>::operator() (TagPairSNAPComputeForce<NEIGHFLAG,E
// E = beta.B + 0.5*B^t.alpha.B
const auto idxb_max = snaKK.idxb_max;
// linear contributions
for (int icoeff = 0; icoeff < ncoeff; icoeff++)
evdwl += d_coeffi[icoeff+1]*my_sna.blist(icoeff,ii);
for (int icoeff = 0; icoeff < ncoeff; icoeff++) {
const auto idxb = icoeff % idxb_max;
const auto idx_chem = icoeff / idxb_max;
evdwl += d_coeffi[icoeff+1]*my_sna.blist(idxb,idx_chem,ii);
}
// quadratic contributions
if (quadraticflag) {
int k = ncoeff+1;
for (int icoeff = 0; icoeff < ncoeff; icoeff++) {
double bveci = my_sna.blist(icoeff,ii);
const auto idxb = icoeff % idxb_max;
const auto idx_chem = icoeff / idxb_max;
double bveci = my_sna.blist(idxb,idx_chem,ii);
evdwl += 0.5*d_coeffi[k++]*bveci*bveci;
for (int jcoeff = icoeff+1; jcoeff < ncoeff; jcoeff++) {
double bvecj = my_sna.blist(jcoeff,ii);
auto jdxb = jcoeff % idxb_max;
auto jdx_chem = jcoeff / idxb_max;
double bvecj = my_sna.blist(jdxb,jdx_chem,ii);
evdwl += d_coeffi[k++]*bveci*bvecj;
}
}

View File

@ -36,7 +36,9 @@ public:
typedef Kokkos::View<double**, DeviceType> t_sna_2d;
typedef Kokkos::View<double**, Kokkos::LayoutLeft, DeviceType> t_sna_2d_ll;
typedef Kokkos::View<double***, DeviceType> t_sna_3d;
typedef Kokkos::View<double***, Kokkos::LayoutLeft, DeviceType> t_sna_3d_ll;
typedef Kokkos::View<double***[3], DeviceType> t_sna_4d;
typedef Kokkos::View<double****, Kokkos::LayoutLeft, DeviceType> t_sna_4d_ll;
typedef Kokkos::View<double**[3], DeviceType> t_sna_3d3;
typedef Kokkos::View<double*****, DeviceType> t_sna_5d;
@ -48,7 +50,8 @@ public:
typedef Kokkos::View<SNAcomplex***, DeviceType> t_sna_3c;
typedef Kokkos::View<SNAcomplex***, Kokkos::LayoutLeft, DeviceType> t_sna_3c_ll;
typedef Kokkos::View<SNAcomplex***[3], DeviceType> t_sna_4c;
typedef Kokkos::View<SNAcomplex***[3], Kokkos::LayoutLeft, DeviceType> t_sna_4c_ll;
typedef Kokkos::View<SNAcomplex***[3], Kokkos::LayoutLeft, DeviceType> t_sna_4c3_ll;
typedef Kokkos::View<SNAcomplex****, Kokkos::LayoutLeft, DeviceType> t_sna_4c_ll;
typedef Kokkos::View<SNAcomplex**[3], DeviceType> t_sna_3c3;
typedef Kokkos::View<SNAcomplex*****, DeviceType> t_sna_5c;
@ -73,27 +76,39 @@ inline
int ncoeff;
// functions for bispectrum coefficients
// functions for bispectrum coefficients, GPU only
KOKKOS_INLINE_FUNCTION
void pre_ui(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team,const int&, int); // ForceSNAP
void pre_ui(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team,const int&,const int&); // ForceSNAP
KOKKOS_INLINE_FUNCTION
void compute_ui(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, const int, const int); // ForceSNAP
KOKKOS_INLINE_FUNCTION
void compute_zi(const int&, const int&, const int&); // ForceSNAP
KOKKOS_INLINE_FUNCTION
void compute_yi(int,int,int,
const Kokkos::View<F_FLOAT***, Kokkos::LayoutLeft, DeviceType> &beta_pack); // ForceSNAP
KOKKOS_INLINE_FUNCTION
void compute_bi(const int&, const int&, const int&); // ForceSNAP
// functions for bispectrum coefficients, CPU only
KOKKOS_INLINE_FUNCTION
void pre_ui_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team,const int&,const int&); // ForceSNAP
KOKKOS_INLINE_FUNCTION
void compute_ui_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int, int); // ForceSNAP
KOKKOS_INLINE_FUNCTION
void compute_zi(const int&); // ForceSNAP
void compute_zi_cpu(const int&); // ForceSNAP
KOKKOS_INLINE_FUNCTION
void zero_yi(const int&, const int&, int); // ForceSNAP
void zero_yi_cpu(const int&,const int&,const int&); // ForceSNAP
KOKKOS_INLINE_FUNCTION
void compute_yi(int,
void compute_yi_cpu(int,
const Kokkos::View<F_FLOAT**, DeviceType> &beta); // ForceSNAP
KOKKOS_INLINE_FUNCTION
void compute_bi(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int); // ForceSNAP
// functions for derivatives
KOKKOS_INLINE_FUNCTION
void compute_bi_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int); // ForceSNAP
// functions for derivatives, GPU only
KOKKOS_INLINE_FUNCTION
void compute_fused_deidrj(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, const int, const int); //ForceSNAP
// functions for derivatives, CPU only
KOKKOS_INLINE_FUNCTION
void compute_duidrj_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int, int); //ForceSNAP
KOKKOS_INLINE_FUNCTION
@ -139,18 +154,32 @@ inline
int twojmax, diagonalstyle;
t_sna_2d_ll blist;
t_sna_2c_ll ulisttot;
t_sna_2c_ll zlist;
t_sna_3d_ll blist;
t_sna_3c_ll ulisttot;
t_sna_3c_ll zlist;
t_sna_3c_ll ulist;
t_sna_2c_ll ylist;
t_sna_3c_ll ylist;
// derivatives of data
t_sna_4c_ll dulist;
t_sna_4c3_ll dulist;
// Modified structures for GPU backend
t_sna_3d_ll ulisttot_re; // split real,
t_sna_3d_ll ulisttot_im; // imag
t_sna_4c_ll ulisttot_pack; // AoSoA layout
t_sna_4c_ll zlist_pack; // AoSoA layout
t_sna_4d_ll blist_pack;
t_sna_4d_ll ylist_pack_re; // split real,
t_sna_4d_ll ylist_pack_im; // imag AoSoA layout
int idxcg_max, idxu_max, idxz_max, idxb_max;
// Chem snap counts
int nelements;
int ndoubles;
int ntriples;
private:
double rmin0, rfac0;
@ -212,9 +241,6 @@ inline
// Chem snap flags
int chem_flag;
int bnorm_flag;
int nelements;
int ndoubles;
int ntriples;
// Self-weight
double wself;

File diff suppressed because it is too large Load Diff

View File

@ -362,6 +362,7 @@ void MSM::setup()
nmax_direct = 8*(nxhi_direct+1)*(nyhi_direct+1)*(nzhi_direct+1);
deallocate();
if (peratom_allocate_flag) deallocate_peratom();
// compute direct sum interaction weights
@ -612,8 +613,6 @@ void MSM::compute(int eflag, int vflag)
void MSM::allocate()
{
deallocate();
// interpolation coeffs
order_allocated = order;
@ -635,9 +634,9 @@ void MSM::allocate()
// allocate memory for each grid level
for (int n=0; n<levels; n++) {
memory->create3d_offset(qgrid[n],nzlo_out[n],nzhi_out[n],
nylo_out[n],nyhi_out[n],nxlo_out[n],nxhi_out[n],"msm:qgrid");
memory->create3d_offset(egrid[n],nzlo_out[n],nzhi_out[n],
nylo_out[n],nyhi_out[n],nxlo_out[n],nxhi_out[n],"msm:egrid");
@ -660,23 +659,29 @@ void MSM::allocate()
void MSM::deallocate()
{
delete cg_all;
cg_all = nullptr;
memory->destroy2d_offset(phi1d,-order_allocated);
memory->destroy2d_offset(dphi1d,-order_allocated);
if (cg_all) delete cg_all;
cg_all = nullptr;
for (int n=0; n<levels; n++) {
memory->destroy3d_offset(qgrid[n],nzlo_out[n],nylo_out[n],nxlo_out[n]);
memory->destroy3d_offset(egrid[n],nzlo_out[n],nylo_out[n],nxlo_out[n]);
if (qgrid[n])
memory->destroy3d_offset(qgrid[n],nzlo_out[n],nylo_out[n],nxlo_out[n]);
if (world_levels[n] != MPI_COMM_NULL)
MPI_Comm_free(&world_levels[n]);
world_levels[n] = MPI_COMM_NULL;
active_flag[n] = 0;
if (egrid[n])
memory->destroy3d_offset(egrid[n],nzlo_out[n],nylo_out[n],nxlo_out[n]);
delete cg[n];
cg[n] = nullptr;
if (world_levels)
if (world_levels[n] != MPI_COMM_NULL)
MPI_Comm_free(&world_levels[n]);
if (cg) {
if (cg[n]) {
delete cg[n];
cg[n] = nullptr;
}
}
}
}
@ -765,7 +770,6 @@ void MSM::deallocate_peratom()
void MSM::allocate_levels()
{
deallocate_levels();
ngrid = new int[levels];
cg = new GridComm*[levels];
@ -815,21 +819,21 @@ void MSM::allocate_levels()
v5grid = new double***[levels];
for (int n=0; n<levels; n++) {
cg[n] = nullptr;
cg[n] = NULL;
world_levels[n] = MPI_COMM_NULL;
active_flag[n] = 0;
cg_peratom[n] = nullptr;
cg_peratom[n] = NULL;
qgrid[n] = nullptr;
egrid[n] = nullptr;
qgrid[n] = NULL;
egrid[n] = NULL;
v0grid[n] = nullptr;
v1grid[n] = nullptr;
v2grid[n] = nullptr;
v3grid[n] = nullptr;
v4grid[n] = nullptr;
v5grid[n] = nullptr;
v0grid[n] = NULL;
v1grid[n] = NULL;
v2grid[n] = NULL;
v3grid[n] = NULL;
v4grid[n] = NULL;
v5grid[n] = NULL;
}
}
/* ----------------------------------------------------------------------
@ -1105,6 +1109,7 @@ void MSM::set_grid_global()
if (!domain->nonperiodic) levels -= 1;
deallocate_levels();
allocate_levels();
// find number of grid levels in each direction

View File

@ -33,6 +33,8 @@ using namespace MathConst;
#define DELTA 16384
#define DELTA_BONUS 8192
int AtomVec::num_atom_vecs = 0;
/* ---------------------------------------------------------------------- */
AtomVec::AtomVec(LAMMPS *lmp) : Pointers(lmp)
@ -54,6 +56,8 @@ AtomVec::AtomVec(LAMMPS *lmp) : Pointers(lmp)
threads = NULL;
++num_atom_vecs;
// peratom variables auto-included in corresponding child style fields string
// these fields cannot be specified in the fields string
@ -93,44 +97,48 @@ AtomVec::~AtomVec()
int datatype,cols;
void *pdata;
--num_atom_vecs;
for (int i = 0; i < nargcopy; i++) delete [] argcopy[i];
delete [] argcopy;
memory->destroy(atom->tag);
memory->destroy(atom->type);
memory->destroy(atom->mask);
memory->destroy(atom->image);
memory->destroy(atom->x);
memory->destroy(atom->v);
memory->destroy(atom->f);
if (num_atom_vecs == 0) {
memory->destroy(atom->tag);
memory->destroy(atom->type);
memory->destroy(atom->mask);
memory->destroy(atom->image);
memory->destroy(atom->x);
memory->destroy(atom->v);
memory->destroy(atom->f);
for (int i = 0; i < ngrow; i++) {
pdata = mgrow.pdata[i];
datatype = mgrow.datatype[i];
cols = mgrow.cols[i];
if (datatype == Atom::DOUBLE) {
if (cols == 0)
memory->destroy(*((double **) pdata));
else if (cols > 0)
memory->destroy(*((double ***) pdata));
else {
memory->destroy(*((double ***) pdata));
}
} else if (datatype == Atom::INT) {
if (cols == 0)
memory->destroy(*((int **) pdata));
else if (cols > 0)
memory->destroy(*((int ***) pdata));
else {
memory->destroy(*((int ***) pdata));
}
} else if (datatype == Atom::BIGINT) {
if (cols == 0)
memory->destroy(*((bigint **) pdata));
else if (cols > 0)
memory->destroy(*((bigint ***) pdata));
else {
memory->destroy(*((bigint ***) pdata));
for (int i = 0; i < ngrow; i++) {
pdata = mgrow.pdata[i];
datatype = mgrow.datatype[i];
cols = mgrow.cols[i];
if (datatype == Atom::DOUBLE) {
if (cols == 0)
memory->destroy(*((double **) pdata));
else if (cols > 0)
memory->destroy(*((double ***) pdata));
else {
memory->destroy(*((double ***) pdata));
}
} else if (datatype == Atom::INT) {
if (cols == 0)
memory->destroy(*((int **) pdata));
else if (cols > 0)
memory->destroy(*((int ***) pdata));
else {
memory->destroy(*((int ***) pdata));
}
} else if (datatype == Atom::BIGINT) {
if (cols == 0)
memory->destroy(*((bigint **) pdata));
else if (cols > 0)
memory->destroy(*((bigint ***) pdata));
else {
memory->destroy(*((bigint ***) pdata));
}
}
}
}

View File

@ -209,6 +209,10 @@ class AtomVec : protected Pointers {
bool *threads;
// counter for atom vec instances
static int num_atom_vecs;
// local methods
void grow_nmax();