diff --git a/src/KOKKOS/mliap_data_kokkos.cpp b/src/KOKKOS/mliap_data_kokkos.cpp index e04eae32cd..30db786f7d 100644 --- a/src/KOKKOS/mliap_data_kokkos.cpp +++ b/src/KOKKOS/mliap_data_kokkos.cpp @@ -70,6 +70,7 @@ void MLIAPDataKokkos::generate_neighdata(class NeighList *list_in, i list = list_in; // grow nmax gradforce array if necessary + if (atom->nmax > nmax) { nmax = atom->nmax; memoryKK->destroy_kokkos(k_gradforce,gradforce); @@ -77,10 +78,12 @@ void MLIAPDataKokkos::generate_neighdata(class NeighList *list_in, i } // clear gradforce array + auto d_gradforce = k_gradforce.template view(); Kokkos::deep_copy(d_gradforce, 0.); // grow arrays if necessary + nlistatoms = list->inum; if (nlistatoms_max < nlistatoms) { memoryKK->destroy_kokkos(k_betas,betas); @@ -113,6 +116,7 @@ void MLIAPDataKokkos::generate_neighdata(class NeighList *list_in, i grow_neigharrays(); // Use the ielems memory for prefix scan and set it at the end to the i type + auto d_iatoms = k_iatoms.template view(); auto d_ielems = k_ielems.template view(); auto d_ij = k_ij.template view(); @@ -179,7 +183,9 @@ void MLIAPDataKokkos::generate_neighdata(class NeighList *list_in, i template void MLIAPDataKokkos::grow_neigharrays() { AtomKokkos *atomKK = (AtomKokkos *) atom; + // grow neighbor arrays if necessary + if (natomneigh_max < nlistatoms) { natomneigh_max = nlistatoms; diff --git a/src/KOKKOS/mliap_descriptor_kokkos.h b/src/KOKKOS/mliap_descriptor_kokkos.h index 9d639f7cb8..cb02a81648 100644 --- a/src/KOKKOS/mliap_descriptor_kokkos.h +++ b/src/KOKKOS/mliap_descriptor_kokkos.h @@ -44,8 +44,6 @@ template class MLIAPDescriptorKokkos : virtual protected Poin virtual ~MLIAPDescriptorKokkos() { - //memoryKK->destroy_kokkos(k_coeffelem); - //model->coeffelem = nullptr; memoryKK->destroy_kokkos(k_wjelem); } diff --git a/src/KOKKOS/mliap_descriptor_so3_kokkos.cpp b/src/KOKKOS/mliap_descriptor_so3_kokkos.cpp index 03b1cbfef2..177ddff80e 100644 --- a/src/KOKKOS/mliap_descriptor_so3_kokkos.cpp +++ b/src/KOKKOS/mliap_descriptor_so3_kokkos.cpp @@ -183,24 +183,23 @@ void MLIAPDescriptorSO3Kokkos::v_tally(int vflag_either, int vflag_g template void MLIAPDescriptorSO3Kokkos::compute_force_gradients(class MLIAPData *data_) { - error->all(FLERR,"This has not been tested in cuda/kokkos"); auto data = static_cast*>(data_); int npairs = data->nij_total; so3ptr_kokkos->spectrum_dxdr(data->nlistatoms, data->k_numneighs, data->k_jelems, this->k_wjelem, data->k_rij, data->k_ij, nmax, lmax, rcutfac, alpha, npairs, data->ndescriptors); - auto d_dplist_r=so3ptr_kokkos->k_dplist_r; + auto d_dplist_r = so3ptr_kokkos->k_dplist_r; auto d_gradforce = data->k_gradforce.template view(); - auto d_gamma= data->k_gamma.template view(); + auto d_gamma = data->k_gamma.template view(); auto d_gamma_row_index = data->k_gamma_row_index.template view(); auto d_gamma_col_index = data->k_gamma_col_index.template view(); - auto d_jatoms= data->k_jatoms.template view(); - auto d_ij= data->k_ij.template view(); + auto d_jatoms = data->k_jatoms.template view(); + auto d_ij = data->k_ij.template view(); auto d_numneighs = data->k_numneighs.template view(); auto d_iatoms = data->k_iatoms.template view(); - auto yoffset=data->yoffset, zoffset=data->zoffset, gamma_nnz=data->gamma_nnz; + auto yoffset = data->yoffset, zoffset = data->zoffset, gamma_nnz = data->gamma_nnz; Kokkos::parallel_for (data->nlistatoms, KOKKOS_LAMBDA (int ii) { const int i = d_iatoms(ii); @@ -253,7 +252,6 @@ void MLIAPDescriptorSO3Kokkos::compute_descriptor_gradients(class ML template void MLIAPDescriptorSO3Kokkos::init() { - //MLIAPDescriptorSO3::init(); so3ptr_kokkos->init(); MLIAPDescriptorKokkos::init_data(); } diff --git a/src/KOKKOS/mliap_model_linear_kokkos.cpp b/src/KOKKOS/mliap_model_linear_kokkos.cpp index 36317815a6..14166ab3cd 100644 --- a/src/KOKKOS/mliap_model_linear_kokkos.cpp +++ b/src/KOKKOS/mliap_model_linear_kokkos.cpp @@ -41,7 +41,7 @@ void MLIAPModelLinearKokkos::compute_gradients(class MLIAPData *data // read but never changes auto d_coeffelem = this->k_coeffelem.template view(); - //read + // read auto d_ielems = k_data->k_ielems.template view(); auto d_descriptors = k_data->k_descriptors.template view(); @@ -51,6 +51,7 @@ void MLIAPModelLinearKokkos::compute_gradients(class MLIAPData *data const auto eflag = data->eflag; const int ndescriptors=data->ndescriptors; + Kokkos::parallel_reduce(data->nlistatoms, KOKKOS_LAMBDA (int ii, double &update) { const int ielem = d_ielems(ii); @@ -70,7 +71,6 @@ void MLIAPModelLinearKokkos::compute_gradients(class MLIAPData *data d_eatoms(ii) = etmp; } }, data->energy); - } /* ---------------------------------------------------------------------- */ diff --git a/src/KOKKOS/mliap_so3_kokkos.cpp b/src/KOKKOS/mliap_so3_kokkos.cpp index 659ad0ecf1..b02ef8b5be 100644 --- a/src/KOKKOS/mliap_so3_kokkos.cpp +++ b/src/KOKKOS/mliap_so3_kokkos.cpp @@ -13,7 +13,6 @@ ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- - Contributing authors: Byungkyun Kang (University of Nevada, Las Vegas) Contributing author: Matt Bettencourt (NVIDIA) ------------------------------------------------------------------------- */ @@ -99,11 +98,11 @@ MLIAP_SO3Kokkos::~MLIAP_SO3Kokkos() memoryKK->destroy_kokkos(m_clisttot_r); memoryKK->destroy_kokkos(m_clisttot_i); - t_numneighs=int_1d(); - t_jelems=int_1d(); - t_wjelem=float_1d(); - t_rij=float_2d(); - t_ij=int_1d(); + t_numneighs = int_1d(); + t_jelems = int_1d(); + t_wjelem = float_1d(); + t_rij = float_2d(); + t_ij = int_1d(); } /* ---------------------------------------------------------------------- */ @@ -128,7 +127,8 @@ void MLIAP_SO3Kokkos::init() memoryKK->create_kokkos(m_ellm1, totali, "MLIAP_SO3Kokkos:m_ellm1"); alloc_init = 2.0 * totali * sizeof(double); using range=Kokkos::RangePolicy; - auto ellpl1=m_ellpl1, ellm1=m_ellm1; + auto ellpl1 = m_ellpl1, ellm1 = m_ellm1; + Kokkos::parallel_for(range(0,m_lmax), KOKKOS_LAMBDA (int ll) { int l=ll+1; ellpl1[l] = get_sum(0, l + 2, 1, 2); @@ -145,10 +145,10 @@ void MLIAP_SO3Kokkos::init() memoryKK->create_kokkos(m_Ylms, totali, "MLIAP_SO3Kokkos:m_Ylms"); alloc_init += 2 * totali * sizeof(double); - auto pfac=m_pfac, Ylms=m_Ylms; - auto pfac_l2=m_pfac_l2, lmax=m_lmax; + auto pfac = m_pfac, Ylms = m_Ylms; + auto pfac_l2 = m_pfac_l2, lmax = m_lmax; // Serial but just to make sure run with device memory - Kokkos::parallel_for(range(0,1), KOKKOS_LAMBDA (int ){ + Kokkos::parallel_for(range(0,1), KOKKOS_LAMBDA (int ) { int i=0; for (int l = 0; l < lmax + 2; l++) for (int m = -l; m < l + 1; m++) { @@ -156,7 +156,7 @@ void MLIAP_SO3Kokkos::init() Ylms[i] = pfac[l * pfac_l2 + m]; i += 1; } - }); + }); m_dfac_l1 = m_lmax + 1; m_dfac_l2 = m_numYlms + 1; @@ -175,10 +175,11 @@ void MLIAP_SO3Kokkos::init() memoryKK->create_kokkos(m_dfac5, totali, "MLIAP_SO3Kokkos:m_dfac5"); alloc_init += 6.0 * totali * sizeof(double); - auto dfac0=m_dfac0,dfac1=m_dfac1,dfac2=m_dfac2,dfac3=m_dfac3,dfac4=m_dfac4,dfac5=m_dfac5; - auto dfac_l2=m_dfac_l2; + auto dfac0 = m_dfac0,dfac1 = m_dfac1,dfac2 = m_dfac2,dfac3 = m_dfac3,dfac4 = m_dfac4,dfac5 = m_dfac5; + auto dfac_l2 = m_dfac_l2; + Kokkos::parallel_for(range(0,m_lmax), KOKKOS_LAMBDA (int ll) { - int l=ll+1; + int l = ll+1; for (int m = -l; m < l + 1; m++) { dfac0[l * dfac_l2 + m] = -sqrt(((l + 1.0) * (l + 1.0) - m * m) / (2.0 * l + 1.0) / (2.0 * l + 3.0)) * l; @@ -424,8 +425,8 @@ void MLIAP_SO3Kokkos::compute_pi(int nmax, int lmax, ViewType clistt for (m = -l; m < l + 1; m++) { - plist_r(indpl, i) += ( clisttot_r(n1, j) * clisttot_r(n2, j) + - clisttot_i(n1, j) * clisttot_i(n2, j)) * + plist_r(indpl, i) += (clisttot_r(n1, j) * clisttot_r(n2, j) + + clisttot_i(n1, j) * clisttot_i(n2, j)) * norm; j += 1; } @@ -481,7 +482,6 @@ template KOKKOS_INLINE_FUNCTION double MLIAP_SO3Kokkos::compute_sfac(double r, double rcut) const { - if (r > rcut) return 0.0; else @@ -493,7 +493,6 @@ template KOKKOS_INLINE_FUNCTION double MLIAP_SO3Kokkos::compute_dsfac(double r, double rcut) const { - if (r > rcut) return 0.0; else @@ -753,21 +752,21 @@ void MLIAP_SO3Kokkos::spectrum(int nlocal, DAT::tdual_int_1d numneig alloc_arrays += 2.0 * totali * sizeof(double); } - - t_numneighs=numneighs.template view(); - t_jelems=jelems.template view(); - t_wjelem=wjelem.template view(); - t_rij=rij.template view(); + t_numneighs = numneighs.template view(); + t_jelems = jelems.template view(); + t_wjelem = wjelem.template view(); + t_rij = rij.template view(); t_ij = k_ij.template view(); - t_nmax=nmax; - t_lmax=lmax; - t_rcut=rcut; - t_alpha=alpha; + t_nmax = nmax; + t_lmax = lmax; + t_rcut = rcut; + t_alpha = alpha; { Kokkos::RangePolicy range(0,nlocal); Kokkos::parallel_for(range, *this); } + { Kokkos::RangePolicy range(0,nlocal); Kokkos::parallel_for(range, *this); @@ -838,7 +837,6 @@ void MLIAP_SO3Kokkos::operator() (const MLIAP_SO3Kokkos: } } compute_pi(t_nmax, t_lmax, clisttot_r, clisttot_i, m_numYlms, m_plist_r, ii); - } /* ---------------------------------------------------------------------- */ @@ -903,6 +901,7 @@ void MLIAP_SO3Kokkos::spectrum_dxdr(int nlocal, DAT::tdual_int_1d nu Kokkos::RangePolicy range(0,nlocal); Kokkos::parallel_for(range, *this); } + { Kokkos::RangePolicy range(0,nlocal); Kokkos::parallel_for(range, *this); diff --git a/src/KOKKOS/pair_mliap_kokkos.cpp b/src/KOKKOS/pair_mliap_kokkos.cpp index 1d09e6a99d..b0eb235c72 100644 --- a/src/KOKKOS/pair_mliap_kokkos.cpp +++ b/src/KOKKOS/pair_mliap_kokkos.cpp @@ -212,7 +212,6 @@ void PairMLIAPKokkos::coeff(int narg, char **arg) { k_cutsq.modify(); k_cutsq.sync(); - // clear setflag since coeff() called once with I,J = * * int n = atom->ntypes;