From 806af5322e2493d8c4a8ca725c574805bf093d84 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Fri, 5 May 2023 19:08:02 -0600 Subject: [PATCH 1/6] Kokkos package optimizations --- src/KOKKOS/angle_cosine_kokkos.cpp | 22 +- src/KOKKOS/atom_kokkos.h | 16 +- src/KOKKOS/atom_map_kokkos.cpp | 346 ++++++++++++++++------------- src/KOKKOS/atom_vec_kokkos.cpp | 2 +- src/KOKKOS/bond_fene_kokkos.cpp | 60 +++-- src/KOKKOS/bond_fene_kokkos.h | 9 +- src/KOKKOS/comm_kokkos.cpp | 9 +- src/KOKKOS/fix_langevin_kokkos.cpp | 10 +- src/KOKKOS/fix_shake_kokkos.cpp | 3 + src/KOKKOS/kokkos_type.h | 51 +++++ src/KOKKOS/neigh_bond_kokkos.cpp | 128 +++-------- src/KOKKOS/neigh_bond_kokkos.h | 5 +- src/KOKKOS/pair_kokkos.h | 18 +- src/atom.h | 6 +- src/fix_langevin.cpp | 4 +- 15 files changed, 362 insertions(+), 327 deletions(-) diff --git a/src/KOKKOS/angle_cosine_kokkos.cpp b/src/KOKKOS/angle_cosine_kokkos.cpp index 24d1b5caea..189a156866 100644 --- a/src/KOKKOS/angle_cosine_kokkos.cpp +++ b/src/KOKKOS/angle_cosine_kokkos.cpp @@ -150,20 +150,26 @@ void AngleCosineKokkos::operator()(TagAngleCosineCompute::operator()(TagAngleCosineCompute KOKKOS_INLINE_FUNCTION static int map_kokkos(tagint global, int map_style, const DAT::tdual_int_1d &k_map_array, const dual_hash_type &k_map_hash) diff --git a/src/KOKKOS/atom_map_kokkos.cpp b/src/KOKKOS/atom_map_kokkos.cpp index f8cc5ab2c4..b30f0eabc0 100644 --- a/src/KOKKOS/atom_map_kokkos.cpp +++ b/src/KOKKOS/atom_map_kokkos.cpp @@ -21,6 +21,8 @@ #include "modify.h" #include "neighbor_kokkos.h" +#include + #include using namespace LAMMPS_NS; @@ -34,8 +36,6 @@ using namespace LAMMPS_NS; set entire array to -1 as initial values for hash option: map_nhash = length of hash table - map_nbucket = # of hash buckets, prime larger than map_nhash * 2 - so buckets will only be filled with 0 or 1 atoms on average ------------------------------------------------------------------------- */ void AtomKokkos::map_init(int check) @@ -58,15 +58,7 @@ void AtomKokkos::map_init(int check) // for hash, set all buckets to empty, put all entries in free list if (!recreate) { - if (map_style == MAP_ARRAY) { - for (int i = 0; i <= map_tag_max; i++) map_array[i] = -1; - } else { - for (int i = 0; i < map_nbucket; i++) map_bucket[i] = -1; - map_nused = 0; - map_free = 0; - for (int i = 0; i < map_nhash; i++) map_hash[i].next = i + 1; - if (map_nhash > 0) map_hash[map_nhash - 1].next = -1; - } + map_clear(); // recreating: delete old map and create new one for array or hash @@ -76,7 +68,8 @@ void AtomKokkos::map_init(int check) if (map_style == MAP_ARRAY) { map_maxarray = map_tag_max; memoryKK->create_kokkos(k_map_array, map_array, map_maxarray + 1, "atom:map_array"); - for (int i = 0; i <= map_tag_max; i++) map_array[i] = -1; + Kokkos::deep_copy(k_map_array.d_view,-1); + k_map_array.modify_device(); } else { @@ -90,35 +83,26 @@ void AtomKokkos::map_init(int check) map_nhash *= 2; map_nhash = MAX(map_nhash, 1000); - // map_nbucket = prime just larger than map_nhash - // next_prime() should be fast enough, - // about 10% of odd integers are prime above 1M - - map_nbucket = next_prime(map_nhash); - - // set all buckets to empty - // set hash to map_nhash in length - // put all hash entries in free list and point them to each other - - map_bucket = new int[map_nbucket]; - for (int i = 0; i < map_nbucket; i++) map_bucket[i] = -1; - - map_hash = new HashElem[map_nhash]; - map_nused = 0; - map_free = 0; - for (int i = 0; i < map_nhash; i++) map_hash[i].next = i + 1; - map_hash[map_nhash - 1].next = -1; - - // use "view" template method to avoid unnecessary deep_copy - - auto h_map_hash = k_map_hash.view(); // get type - h_map_hash = decltype(h_map_hash)(map_nhash); - k_map_hash.view() = h_map_hash; + k_map_hash = dual_hash_type(map_nhash); } } +} - k_sametag.modify_host(); - if (map_style == Atom::MAP_ARRAY) k_map_array.modify_host(); +/* ---------------------------------------------------------------------- + clear global -> local map for all of my own and ghost atoms + for hash table option: + global ID may not be in table if image atom was already cleared +------------------------------------------------------------------------- */ + +void AtomKokkos::map_clear() +{ + if (map_style == Atom::MAP_ARRAY) { + Kokkos::deep_copy(k_map_array.d_view,-1); + k_map_array.modify_device(); + } else { + k_map_hash.d_view.clear(); + k_map_hash.modify_device(); + } } /* ---------------------------------------------------------------------- @@ -135,28 +119,16 @@ void AtomKokkos::map_set() { int nall = nlocal + nghost; - atomKK->sync(Host, TAG_MASK); + // possible reallocation of sametag must come before loop over atoms + // since loop sets sametag - k_sametag.sync_host(); - if (map_style == Atom::MAP_ARRAY) k_map_array.sync_host(); + if (nall > max_same) { + max_same = nall + EXTRA; + memoryKK->destroy_kokkos(k_sametag, sametag); + memoryKK->create_kokkos(k_sametag, sametag, max_same, "atom:sametag"); + } - if (map_style == MAP_ARRAY) { - - // possible reallocation of sametag must come before loop over atoms - // since loop sets sametag - - if (nall > max_same) { - max_same = nall + EXTRA; - memoryKK->destroy_kokkos(k_sametag, sametag); - memoryKK->create_kokkos(k_sametag, sametag, max_same, "atom:sametag"); - } - - for (int i = nall - 1; i >= 0; i--) { - sametag[i] = map_array[tag[i]]; - map_array[tag[i]] = i; - } - - } else { + if (map_style == MAP_HASH) { // if this proc has more atoms than hash table size, call map_init() // call with 0 since max atomID in system has not changed @@ -164,109 +136,179 @@ void AtomKokkos::map_set() // b/c map_init() may invoke map_delete(), whacking sametag if (nall > map_nhash) map_init(0); - if (nall > max_same) { - max_same = nall + EXTRA; - memoryKK->destroy_kokkos(k_sametag, sametag); - memoryKK->create_kokkos(k_sametag, sametag, max_same, "atom:sametag"); - } - - int previous, ibucket, index; - tagint global; - - for (int i = nall - 1; i >= 0; i--) { - sametag[i] = map_find_hash(tag[i]); - - // search for key - // if found it, just overwrite local value with index - - previous = -1; - global = tag[i]; - ibucket = global % map_nbucket; - index = map_bucket[ibucket]; - while (index > -1) { - if (map_hash[index].global == global) break; - previous = index; - index = map_hash[index].next; - } - if (index > -1) { - map_hash[index].local = i; - continue; - } - - // take one entry from free list - // add the new global/local pair as entry at end of bucket list - // special logic if this entry is 1st in bucket - - index = map_free; - map_free = map_hash[map_free].next; - if (previous == -1) - map_bucket[ibucket] = index; - else - map_hash[previous].next = index; - map_hash[index].global = global; - map_hash[index].local = i; - map_hash[index].next = -1; - map_nused++; - } - - // Copy to Kokkos hash - - // use "view" template method to avoid unnecessary deep_copy - - auto h_map_hash = k_map_hash.view(); - h_map_hash.clear(); - - for (int i = nall - 1; i >= 0; i--) { - - // search for key - // if don't find it, done - - previous = -1; - global = tag[i]; - ibucket = global % map_nbucket; - index = map_bucket[ibucket]; - while (index > -1) { - if (map_hash[index].global == global) break; - previous = index; - index = map_hash[index].next; - } - if (index == -1) continue; - - int local = map_hash[index].local; - - auto insert_result = h_map_hash.insert(global, local); - if (insert_result.failed()) error->one(FLERR, "Kokkos::UnorderedMap insertion failed"); - } } - k_sametag.modify_host(); - if (map_style == Atom::MAP_ARRAY) - k_map_array.modify_host(); - else if (map_style == Atom::MAP_HASH) { + atomKK->sync(Device, TAG_MASK); - // use "view" template method to avoid unnecessary deep_copy + auto d_tag = atomKK->k_tag.d_view; + auto d_sametag = k_sametag.d_view; - auto h_map_hash = k_map_hash.view(); - auto d_map_hash = k_map_hash.view(); + // sort by tag - // check if fix shake or neigh bond needs a device hash + auto d_tag_sorted = DAT::t_tagint_1d(Kokkos::NoInit("atom:tag_sorted"),nall); + auto d_i_sorted = DAT::t_int_1d(Kokkos::NoInit("atom:i_sorted"),nall); - int device_hash_flag = 0; + typedef Kokkos::DualView tdual_tagint_2; + typedef tdual_tagint_2::t_dev t_tagint_2; + typedef tdual_tagint_2::t_host t_host_tagint_2; - auto neighborKK = (NeighborKokkos *) neighbor; - if (neighborKK->device_flag) device_hash_flag = 1; + auto d_tag_min_max = t_tagint_2(Kokkos::NoInit("atom:tag_min_max")); + auto h_tag_min_max = t_host_tagint_2(Kokkos::NoInit("atom:tag_min_max")); - for (int n = 0; n < modify->nfix; n++) - if (utils::strmatch(modify->fix[n]->style, "^shake")) - if (modify->fix[n]->execution_space == Device) device_hash_flag = 1; + auto d_tag_min = Kokkos::subview(d_tag_min_max,0); + auto d_tag_max = Kokkos::subview(d_tag_min_max,1); - if (device_hash_flag) { - Kokkos::deep_copy(d_map_hash, h_map_hash); - k_map_hash.view() = d_map_hash; + auto h_tag_min = Kokkos::subview(h_tag_min_max,0); + auto h_tag_max = Kokkos::subview(h_tag_min_max,1); + + h_tag_min() = MAXTAGINT; + h_tag_max() = 0; + + Kokkos::deep_copy(d_tag_min_max,h_tag_min_max); + + Kokkos::parallel_for(nall, LAMMPS_LAMBDA(int i) { + d_i_sorted(i) = i; + tagint tag_i = d_tag(i); + d_tag_sorted(i) = tag_i; + Kokkos::atomic_min(&d_tag_min(),tag_i); + Kokkos::atomic_max(&d_tag_max(),tag_i); + }); + + Kokkos::deep_copy(h_tag_min_max,d_tag_min_max); + + tagint min = h_tag_min(); + tagint max = h_tag_max(); + + using KeyViewType = decltype(d_tag_sorted); + using BinOp = Kokkos::BinOp1D; + + BinOp binner(nall, min, max); + + Kokkos::BinSort Sorter(d_tag_sorted, 0, nall, binner, true); + Sorter.create_permute_vector(LMPDeviceType()); + Sorter.sort(LMPDeviceType(), d_tag_sorted, 0, nall); + Sorter.sort(LMPDeviceType(), d_i_sorted, 0, nall); + + auto d_map_array = k_map_array.d_view; + auto d_map_hash = k_map_hash.d_view; + d_map_hash.clear(); + + auto d_error_flag = k_error_flag.d_view; + Kokkos::deep_copy(d_error_flag,0); + + // for each tag find: + // neighboring atoms with closest local id for sametag + // atom with smallest local id for atom map + + Kokkos::parallel_for(nall, LAMMPS_LAMBDA(int ii) { + const int i = d_i_sorted(ii); + const tagint tag_i = d_tag_sorted(ii); + + int i_min = i; + int i_closest = MAXTAGINT; + + // search atoms with same tag in the forward direction + + int jj = ii+1; + int closest_flag = 0; + + while (jj < nall) { + const tagint tag_j = d_tag_sorted(jj); + if (tag_j != tag_i) break; + const int j = d_i_sorted(jj); + i_min = MIN(i_min,j); + if (j > i) { + i_closest = MIN(i_closest,j); + closest_flag = 1; + } + jj++; } + + // search atoms with same tag in the reverse direction + + jj = ii-1; + + while (jj >= 0) { + const tagint tag_j = d_tag_sorted(jj); + if (tag_j != tag_i) break; + const int j = d_i_sorted(jj); + i_min = MIN(i_min,j); + if (j > i) { + i_closest = MIN(i_closest,j); + closest_flag = 1; + } + jj--; + } + + if (!closest_flag) + i_closest = -1; + + d_sametag(i) = i_closest; + + if (i == i_min) { + if (map_style == MAP_ARRAY) + d_map_array(tag_i) = i_min; + else { + auto insert_result = d_map_hash.insert(tag_i, i_min); + if (insert_result.failed()) d_error_flag() = 1; + } + } + + }); + + auto h_error_flag = Kokkos::create_mirror_view_and_copy(LMPHostType(),d_error_flag); + if (h_error_flag()) + error->one(FLERR,"Failed to insert into Kokkos hash atom map"); + + k_sametag.modify_device(); + + if (map_style == MAP_ARRAY) + k_map_array.modify_device(); + else + k_map_hash.modify_device(); +} + +/* ---------------------------------------------------------------------- + set global to local map for one atom + for hash table option: + global ID may already be in table if atom was already set + called by Special class +------------------------------------------------------------------------- */ + +void AtomKokkos::map_one(tagint global, int local) +{ + if (map_style == MAP_ARRAY) { + k_map_array.sync_host(); + k_map_array.h_view[global] = local; + } else { + k_map_hash.sync_host(); + auto& h_map_hash = k_map_hash.h_view; + + auto insert_result = h_map_hash.insert(global, local); + if (insert_result.existing()) + h_map_hash.value_at(h_map_hash.find(global)) = local; + else if (insert_result.failed()) + error->one(FLERR,"Failed to insert into Kokkos hash atom map"); } } +/* ---------------------------------------------------------------------- + lookup global ID in hash table, return local index + called by map() in atom.h +------------------------------------------------------------------------- */ + +int AtomKokkos::map_find_hash(tagint global) +{ + k_map_hash.sync_host(); + auto& h_map_hash = k_map_hash.h_view; + + int local = -1; + auto index = h_map_hash.find(global); + if (h_map_hash.valid_at(index)) + local = h_map_hash.value_at(index); + return local; +} + /* ---------------------------------------------------------------------- free the array or hash table for global to local mapping ------------------------------------------------------------------------- */ @@ -279,10 +321,6 @@ void AtomKokkos::map_delete() if (map_style == MAP_ARRAY) { memoryKK->destroy_kokkos(k_map_array, map_array); map_array = nullptr; - } else { - k_map_hash.h_view = host_hash_type(); - k_map_hash.d_view = hash_type(); - } - - Atom::map_delete(); + } else + k_map_hash = dual_hash_type(); } diff --git a/src/KOKKOS/atom_vec_kokkos.cpp b/src/KOKKOS/atom_vec_kokkos.cpp index b23222e684..93393e9e09 100644 --- a/src/KOKKOS/atom_vec_kokkos.cpp +++ b/src/KOKKOS/atom_vec_kokkos.cpp @@ -35,7 +35,7 @@ AtomVecKokkos::AtomVecKokkos(LAMMPS *lmp) : AtomVec(lmp) unpack_exchange_indices_flag = 0; size_exchange = 0; - k_count = DAT::tdual_int_1d("atom::k_count",1); + k_count = DAT::tdual_int_1d("atom:k_count",1); atomKK = (AtomKokkos *) atom; commKK = (CommKokkos *) comm; } diff --git a/src/KOKKOS/bond_fene_kokkos.cpp b/src/KOKKOS/bond_fene_kokkos.cpp index 0a294bde36..a6bbb1edde 100644 --- a/src/KOKKOS/bond_fene_kokkos.cpp +++ b/src/KOKKOS/bond_fene_kokkos.cpp @@ -43,13 +43,8 @@ BondFENEKokkos::BondFENEKokkos(LAMMPS *lmp) : BondFENE(lmp) datamask_read = X_MASK | F_MASK | ENERGY_MASK | VIRIAL_MASK; datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK; - k_warning_flag = DAT::tdual_int_scalar("Bond:warning_flag"); - d_warning_flag = k_warning_flag.view(); - h_warning_flag = k_warning_flag.h_view; - - k_error_flag = DAT::tdual_int_scalar("Bond:error_flag"); - d_error_flag = k_error_flag.view(); - h_error_flag = k_error_flag.h_view; + d_flag = typename AT::t_int_scalar("bond:flag"); + h_flag = HAT::t_int_scalar("bond:flag_mirror"); } /* ---------------------------------------------------------------------- */ @@ -99,13 +94,7 @@ void BondFENEKokkos::compute(int eflag_in, int vflag_in) nlocal = atom->nlocal; newton_bond = force->newton_bond; - h_warning_flag() = 0; - k_warning_flag.template modify(); - k_warning_flag.template sync(); - - h_error_flag() = 0; - k_error_flag.template modify(); - k_error_flag.template sync(); + Kokkos::deep_copy(d_flag,0); copymode = 1; @@ -127,14 +116,11 @@ void BondFENEKokkos::compute(int eflag_in, int vflag_in) } } - k_warning_flag.template modify(); - k_warning_flag.template sync(); - if (h_warning_flag()) - error->warning(FLERR,"FENE bond too long"); + Kokkos::deep_copy(h_flag,d_flag); - k_error_flag.template modify(); - k_error_flag.template sync(); - if (h_error_flag()) + if (h_flag() == 1) + error->warning(FLERR,"FENE bond too long"); + else if (h_flag() == 2) error->one(FLERR,"Bad FENE bond"); if (eflag_global) energy += ev.evdwl; @@ -165,8 +151,6 @@ template KOKKOS_INLINE_FUNCTION void BondFENEKokkos::operator()(TagBondFENECompute, const int &n, EV_FLOAT& ev) const { - if (d_error_flag()) return; - // The f array is atomic Kokkos::View::value,Kokkos::MemoryTraits > a_f = f; @@ -178,10 +162,15 @@ void BondFENEKokkos::operator()(TagBondFENECompute r0, then rlogarg < 0.0 which is an error @@ -189,31 +178,32 @@ void BondFENEKokkos::operator()(TagBondFENECompute 2*r0 something serious is wrong, abort if (rlogarg < 0.1) { - if (!d_warning_flag()) - d_warning_flag() = 1; - if (rlogarg <= -3.0 && !d_error_flag()) - d_error_flag() = 1; + if (rlogarg <= -3.0) + d_flag() = 2; + else + d_flag() = 1; rlogarg = 0.1; } - F_FLOAT fbond = -d_k[type]/rlogarg; + F_FLOAT fbond = -k/rlogarg; // force from LJ term F_FLOAT sr6 = 0.0; - if (rsq < MY_CUBEROOT2*d_sigma[type]*d_sigma[type]) { - const F_FLOAT sr2 = d_sigma[type]*d_sigma[type]/rsq; + F_FLOAT sigma2 = sigma*sigma; + if (rsq < MY_CUBEROOT2*sigma2) { + const F_FLOAT sr2 = sigma2/rsq; sr6 = sr2*sr2*sr2; - fbond += 48.0*d_epsilon[type]*sr6*(sr6-0.5)/rsq; + fbond += 48.0*epsilon*sr6*(sr6-0.5)/rsq; } // energy F_FLOAT ebond = 0.0; if (eflag) { - ebond = -0.5 * d_k[type]*r0sq*log(rlogarg); - if (rsq < MY_CUBEROOT2*d_sigma[type]*d_sigma[type]) - ebond += 4.0*d_epsilon[type]*sr6*(sr6-1.0) + d_epsilon[type]; + ebond = -0.5 * k*r0sq*log(rlogarg); + if (rsq < MY_CUBEROOT2*sigma2) + ebond += 4.0*epsilon*sr6*(sr6-1.0) + epsilon; } // apply force to each of 2 atoms diff --git a/src/KOKKOS/bond_fene_kokkos.h b/src/KOKKOS/bond_fene_kokkos.h index e2ba64a114..18f8d87b6a 100644 --- a/src/KOKKOS/bond_fene_kokkos.h +++ b/src/KOKKOS/bond_fene_kokkos.h @@ -71,13 +71,8 @@ class BondFENEKokkos : public BondFENE { typename ArrayTypes::t_efloat_1d d_eatom; typename ArrayTypes::t_virial_array d_vatom; - DAT::tdual_int_scalar k_warning_flag; - typename AT::t_int_scalar d_warning_flag; - HAT::t_int_scalar h_warning_flag; - - DAT::tdual_int_scalar k_error_flag; - typename AT::t_int_scalar d_error_flag; - HAT::t_int_scalar h_error_flag; + typename AT::t_int_scalar d_flag; + HAT::t_int_scalar h_flag; int nlocal,newton_bond; int eflag,vflag; diff --git a/src/KOKKOS/comm_kokkos.cpp b/src/KOKKOS/comm_kokkos.cpp index 3687216bf9..7d007a666c 100644 --- a/src/KOKKOS/comm_kokkos.cpp +++ b/src/KOKKOS/comm_kokkos.cpp @@ -729,13 +729,8 @@ void CommKokkos::exchange_device() double lo,hi; MPI_Request request; - // clear global->local map for owned and ghost atoms - // b/c atoms migrate to new procs in exchange() and - // new ghosts are created in borders() - // map_set() is done at end of borders() // clear ghost count and any ghost bonus data internal to AtomVec - if (map_style != Atom::MAP_NONE) atom->map_clear(); atom->nghost = 0; atom->avec->clear_bonus(); @@ -1275,10 +1270,8 @@ void CommKokkos::borders_device() { // reset global->local map - if (map_style != Atom::MAP_NONE) { - atomKK->sync(Host,TAG_MASK); + if (map_style != Atom::MAP_NONE) atom->map_set(); - } } /* ---------------------------------------------------------------------- diff --git a/src/KOKKOS/fix_langevin_kokkos.cpp b/src/KOKKOS/fix_langevin_kokkos.cpp index 4d7a3e8820..437dd9daef 100644 --- a/src/KOKKOS/fix_langevin_kokkos.cpp +++ b/src/KOKKOS/fix_langevin_kokkos.cpp @@ -684,8 +684,6 @@ void FixLangevinKokkos::zero_force_item(int i) const template void FixLangevinKokkos::compute_target() { - atomKK->sync(Host, MASK_MASK); - mask = atomKK->k_mask.template view(); int nlocal = atomKK->nlocal; double delta = update->ntimestep - update->beginstep; @@ -710,12 +708,14 @@ void FixLangevinKokkos::compute_target() memoryKK->destroy_kokkos(k_tforce,tforce); memoryKK->create_kokkos(k_tforce,tforce,maxatom2,"langevin:tforce"); d_tforce = k_tforce.template view(); - h_tforce = k_tforce.template view(); + h_tforce = k_tforce.h_view; } input->variable->compute_atom(tvar,igroup,tforce,1,0); // tforce is modified on host - k_tforce.template modify(); + k_tforce.modify_host(); + atomKK->sync(Host, MASK_MASK); + auto h_mask = atomKK->k_mask.h_view; for (int i = 0; i < nlocal; i++) - if (mask[i] & groupbit) + if (h_mask[i] & groupbit) if (h_tforce[i] < 0.0) error->one(FLERR, "Fix langevin variable returned negative temperature"); diff --git a/src/KOKKOS/fix_shake_kokkos.cpp b/src/KOKKOS/fix_shake_kokkos.cpp index 1ea3ed1c5a..3e88860622 100644 --- a/src/KOKKOS/fix_shake_kokkos.cpp +++ b/src/KOKKOS/fix_shake_kokkos.cpp @@ -221,6 +221,7 @@ void FixShakeKokkos::pre_neighbor() k_map_array.template sync(); } else if (map_style == Atom::MAP_HASH) { k_map_hash = atomKK->k_map_hash; + k_map_hash.template sync(); } k_shake_flag.sync(); @@ -248,6 +249,7 @@ void FixShakeKokkos::pre_neighbor() k_map_array.template sync(); } else if (map_style == Atom::MAP_HASH) { k_map_hash = atomKK->k_map_hash; + k_map_hash.template sync(); } atomKK->k_sametag.sync(); @@ -357,6 +359,7 @@ void FixShakeKokkos::post_force(int vflag) k_map_array.template sync(); } else if (map_style == Atom::MAP_HASH) { k_map_hash = atomKK->k_map_hash; + k_map_hash.template sync(); } if (d_rmass.data()) diff --git a/src/KOKKOS/kokkos_type.h b/src/KOKKOS/kokkos_type.h index 555c7fa9ae..f8d2b4947a 100644 --- a/src/KOKKOS/kokkos_type.h +++ b/src/KOKKOS/kokkos_type.h @@ -570,6 +570,21 @@ struct dual_hash_type { hash_type d_view; host_hash_type h_view; + bool modified_device; + bool modified_host; + + dual_hash_type() { + modified_device = modified_host = false; + d_view = hash_type(); + h_view = host_hash_type(); + } + + dual_hash_type(int capacity) { + modified_device = modified_host = false; + d_view = hash_type(capacity); + h_view = host_hash_type(capacity); + } + template std::enable_if_t<(std::is_same::value || Kokkos::SpaceAccessibility::accessible),hash_type&> view() {return d_view;} @@ -584,6 +599,42 @@ struct dual_hash_type { KOKKOS_INLINE_FUNCTION std::enable_if_t::value || Kokkos::SpaceAccessibility::accessible),const host_hash_type&> const_view() const {return h_view;} + void modify_device() + { + modified_device = true; + if (modified_device && modified_host) + Kokkos::abort("Concurrent modification of host and device hashes"); + } + + void modify_host() + { + modified_host = true; + if (modified_device && modified_host) + Kokkos::abort("Concurrent modification of host and device hashes"); + } + + void sync_device() + { + if (modified_host) { + Kokkos::deep_copy(d_view,h_view); + modified_host = false; + } + } + + void sync_host() + { + if (modified_device) { + Kokkos::deep_copy(h_view,d_view); + modified_device = false; + } + } + + template + std::enable_if_t<(std::is_same::value || Kokkos::SpaceAccessibility::accessible),void> sync() {sync_device();} + + template + std::enable_if_t::value || Kokkos::SpaceAccessibility::accessible),void> sync() {sync_host();} + }; template diff --git a/src/KOKKOS/neigh_bond_kokkos.cpp b/src/KOKKOS/neigh_bond_kokkos.cpp index 5a250b6d23..9067284426 100644 --- a/src/KOKKOS/neigh_bond_kokkos.cpp +++ b/src/KOKKOS/neigh_bond_kokkos.cpp @@ -50,13 +50,16 @@ NeighBondKokkos::NeighBondKokkos(LAMMPS *lmp) : Pointers(lmp) datamask_read = EMPTY_MASK; datamask_modify = EMPTY_MASK; - k_nlist = DAT::tdual_int_scalar("NeighBond:nlist"); - d_nlist = k_nlist.view(); - h_nlist = k_nlist.h_view; + // use 1D view for scalars to reduce GPU memory operations - k_fail_flag = DAT::tdual_int_scalar("NeighBond:fail_flag"); - d_fail_flag = k_fail_flag.view(); - h_fail_flag = k_fail_flag.h_view; + d_scalars = typename AT::t_int_1d("NeighBond:scalars",2); + h_scalars = HAT::t_int_1d("NeighBond:scalars_mirror",2); + + d_nlist = Kokkos::subview(d_scalars,0); + d_fail_flag = Kokkos::subview(d_scalars,1); + + h_nlist = Kokkos::subview(h_scalars,0); + h_fail_flag = Kokkos::subview(h_scalars,1); maxbond = 0; maxangle = 0; @@ -240,22 +243,14 @@ void NeighBondKokkos::bond_all() do { nmissing = 0; - h_nlist() = 0; - k_nlist.template modify(); - k_nlist.template sync(); - - h_fail_flag() = 0; - k_fail_flag.template modify(); - k_fail_flag.template sync(); + Kokkos::deep_copy(d_scalars,0); Kokkos::parallel_reduce(Kokkos::RangePolicy(0,nlocal),*this,nmissing); - k_nlist.template modify(); - k_nlist.template sync(); + Kokkos::deep_copy(h_scalars,d_scalars); + neighbor->nbondlist = h_nlist(); - k_fail_flag.template modify(); - k_fail_flag.template sync(); if (h_fail_flag()) { maxbond = neighbor->nbondlist + BONDDELTA; memoryKK->grow_kokkos(k_bondlist,neighbor->bondlist,maxbond,3,"neighbor:neighbor->bondlist"); @@ -327,22 +322,14 @@ void NeighBondKokkos::bond_partial() do { nmissing = 0; - h_nlist() = 0; - k_nlist.template modify(); - k_nlist.template sync(); - - h_fail_flag() = 0; - k_fail_flag.template modify(); - k_fail_flag.template sync(); + Kokkos::deep_copy(d_scalars,0); Kokkos::parallel_reduce(Kokkos::RangePolicy(0,nlocal),*this,nmissing); - k_nlist.template modify(); - k_nlist.template sync(); + Kokkos::deep_copy(h_scalars,d_scalars); + neighbor->nbondlist = h_nlist(); - k_fail_flag.template modify(); - k_fail_flag.template sync(); if (h_fail_flag()) { maxbond = neighbor->nbondlist + BONDDELTA; memoryKK->grow_kokkos(k_bondlist,neighbor->bondlist,maxbond,3,"neighbor:neighbor->bondlist"); @@ -440,22 +427,14 @@ void NeighBondKokkos::angle_all() do { nmissing = 0; - h_nlist() = 0; - k_nlist.template modify(); - k_nlist.template sync(); - - h_fail_flag() = 0; - k_fail_flag.template modify(); - k_fail_flag.template sync(); + Kokkos::deep_copy(d_scalars,0); Kokkos::parallel_reduce(Kokkos::RangePolicy(0,nlocal),*this,nmissing); - k_nlist.template modify(); - k_nlist.template sync(); + Kokkos::deep_copy(h_scalars,d_scalars); + neighbor->nanglelist = h_nlist(); - k_fail_flag.template modify(); - k_fail_flag.template sync(); if (h_fail_flag()) { maxangle = neighbor->nanglelist + BONDDELTA; memoryKK->grow_kokkos(k_anglelist,neighbor->anglelist,maxangle,4,"neighbor:neighbor->anglelist"); @@ -534,22 +513,14 @@ void NeighBondKokkos::angle_partial() do { nmissing = 0; - h_nlist() = 0; - k_nlist.template modify(); - k_nlist.template sync(); - - h_fail_flag() = 0; - k_fail_flag.template modify(); - k_fail_flag.template sync(); + Kokkos::deep_copy(d_scalars,0); Kokkos::parallel_reduce(Kokkos::RangePolicy(0,nlocal),*this,nmissing); - k_nlist.template modify(); - k_nlist.template sync(); + Kokkos::deep_copy(h_scalars,d_scalars); + neighbor->nanglelist = h_nlist(); - k_fail_flag.template modify(); - k_fail_flag.template sync(); if (h_fail_flag()) { maxangle = neighbor->nanglelist + BONDDELTA; memoryKK->grow_kokkos(k_anglelist,neighbor->anglelist,maxangle,4,"neighbor:neighbor->anglelist"); @@ -667,22 +638,14 @@ void NeighBondKokkos::dihedral_all() do { nmissing = 0; - h_nlist() = 0; - k_nlist.template modify(); - k_nlist.template sync(); - - h_fail_flag() = 0; - k_fail_flag.template modify(); - k_fail_flag.template sync(); + Kokkos::deep_copy(d_scalars,0); Kokkos::parallel_reduce(Kokkos::RangePolicy(0,nlocal),*this,nmissing); - k_nlist.template modify(); - k_nlist.template sync(); + Kokkos::deep_copy(h_scalars,d_scalars); + neighbor->ndihedrallist = h_nlist(); - k_fail_flag.template modify(); - k_fail_flag.template sync(); if (h_fail_flag()) { maxdihedral = neighbor->ndihedrallist + BONDDELTA; memoryKK->grow_kokkos(k_dihedrallist,neighbor->dihedrallist,maxdihedral,5,"neighbor:neighbor->dihedrallist"); @@ -766,22 +729,14 @@ void NeighBondKokkos::dihedral_partial() do { nmissing = 0; - h_nlist() = 0; - k_nlist.template modify(); - k_nlist.template sync(); - - h_fail_flag() = 0; - k_fail_flag.template modify(); - k_fail_flag.template sync(); + Kokkos::deep_copy(d_scalars,0); Kokkos::parallel_reduce(Kokkos::RangePolicy(0,nlocal),*this,nmissing); - k_nlist.template modify(); - k_nlist.template sync(); + Kokkos::deep_copy(h_scalars,d_scalars); + neighbor->ndihedrallist = h_nlist(); - k_fail_flag.template modify(); - k_fail_flag.template sync(); if (h_fail_flag()) { maxdihedral = neighbor->ndihedrallist + BONDDELTA; memoryKK->grow_kokkos(k_dihedrallist,neighbor->dihedrallist,maxdihedral,5,"neighbor:neighbor->dihedrallist"); @@ -921,22 +876,14 @@ void NeighBondKokkos::improper_all() do { nmissing = 0; - h_nlist() = 0; - k_nlist.template modify(); - k_nlist.template sync(); - - h_fail_flag() = 0; - k_fail_flag.template modify(); - k_fail_flag.template sync(); + Kokkos::deep_copy(d_scalars,0); Kokkos::parallel_reduce(Kokkos::RangePolicy(0,nlocal),*this,nmissing); - k_nlist.template modify(); - k_nlist.template sync(); + Kokkos::deep_copy(h_scalars,d_scalars); + neighbor->nimproperlist = h_nlist(); - k_fail_flag.template modify(); - k_fail_flag.template sync(); if (h_fail_flag()) { maximproper = neighbor->nimproperlist + BONDDELTA; memoryKK->grow_kokkos(k_improperlist,neighbor->improperlist,maximproper,5,"neighbor:neighbor->improperlist"); @@ -1020,22 +967,14 @@ void NeighBondKokkos::improper_partial() do { nmissing = 0; - h_nlist() = 0; - k_nlist.template modify(); - k_nlist.template sync(); - - h_fail_flag() = 0; - k_fail_flag.template modify(); - k_fail_flag.template sync(); + Kokkos::deep_copy(d_scalars,0); Kokkos::parallel_reduce(Kokkos::RangePolicy(0,nlocal),*this,nmissing); - k_nlist.template modify(); - k_nlist.template sync(); + Kokkos::deep_copy(h_scalars,d_scalars); + neighbor->nimproperlist = h_nlist(); - k_fail_flag.template modify(); - k_fail_flag.template sync(); if (h_fail_flag()) { maximproper = neighbor->nimproperlist + BONDDELTA; memoryKK->grow_kokkos(k_improperlist,neighbor->improperlist,maximproper,5,"neighbor:neighbor->improperlist"); @@ -1221,6 +1160,7 @@ void NeighBondKokkos::update_class_variables() k_map_array.template sync(); } else if (map_style == Atom::MAP_HASH) { k_map_hash = atomKK->k_map_hash; + k_map_hash.template sync(); } } diff --git a/src/KOKKOS/neigh_bond_kokkos.h b/src/KOKKOS/neigh_bond_kokkos.h index e4be1aeee3..480726c602 100644 --- a/src/KOKKOS/neigh_bond_kokkos.h +++ b/src/KOKKOS/neigh_bond_kokkos.h @@ -115,11 +115,10 @@ class NeighBondKokkos : protected Pointers { typename AT::t_tagint_2d improper_atom1,improper_atom2, improper_atom3,improper_atom4; - DAT::tdual_int_scalar k_nlist; + typename AT::t_int_1d d_scalars; + HAT::t_int_1d h_scalars; typename AT::t_int_scalar d_nlist; HAT::t_int_scalar h_nlist; - - DAT::tdual_int_scalar k_fail_flag; typename AT::t_int_scalar d_fail_flag; HAT::t_int_scalar h_fail_flag; diff --git a/src/KOKKOS/pair_kokkos.h b/src/KOKKOS/pair_kokkos.h index c244d6a8f9..f6925a376d 100644 --- a/src/KOKKOS/pair_kokkos.h +++ b/src/KOKKOS/pair_kokkos.h @@ -137,6 +137,12 @@ struct PairComputeFunctor { F_FLOAT fytmp = 0.0; F_FLOAT fztmp = 0.0; + if (NEIGHFLAG == FULL) { + f(i,0) = 0.0; + f(i,1) = 0.0; + f(i,2) = 0.0; + } + for (int jj = 0; jj < jnum; jj++) { int j = neighbors_i(jj); const F_FLOAT factor_lj = c.special_lj[sbmask(j)]; @@ -205,6 +211,12 @@ struct PairComputeFunctor { F_FLOAT fytmp = 0.0; F_FLOAT fztmp = 0.0; + if (NEIGHFLAG == FULL) { + f(i,0) = 0.0; + f(i,1) = 0.0; + f(i,2) = 0.0; + } + for (int jj = 0; jj < jnum; jj++) { int j = neighbors_i(jj); const F_FLOAT factor_lj = c.special_lj[sbmask(j)]; @@ -767,7 +779,6 @@ EV_FLOAT pair_compute_neighlist (PairStyle* fpair, typename std::enable_if<(NEIG fpair->lmp->kokkos->neigh_thread = 1; if (fpair->lmp->kokkos->neigh_thread) { - fpair->fuse_force_clear_flag = 1; int vector_length = 8; int atoms_per_team = 32; @@ -805,6 +816,7 @@ template EV_FLOAT pair_compute (PairStyle* fpair, NeighListKokkos* list) { EV_FLOAT ev; if (fpair->neighflag == FULL) { + fpair->fuse_force_clear_flag = 1; ev = pair_compute_neighlist (fpair,list); } else if (fpair->neighflag == HALFTHREAD) { ev = pair_compute_neighlist (fpair,list); @@ -860,11 +872,7 @@ void pair_virial_fdotr_compute(PairStyle* fpair) { fpair->virial[5] = virial.v[5]; } - - - } #endif #endif - diff --git a/src/atom.h b/src/atom.h index 810a2829ed..c8a8533e33 100644 --- a/src/atom.h +++ b/src/atom.h @@ -385,7 +385,7 @@ class Atom : protected Pointers { // map lookup function inlined for efficiency // return -1 if no map defined - inline int map(tagint global) + virtual inline int map(tagint global) { if (map_style == 1) return map_array[global]; @@ -398,10 +398,10 @@ class Atom : protected Pointers { virtual void map_init(int check = 1); virtual void map_clear(); virtual void map_set(); - void map_one(tagint, int); + virtual void map_one(tagint, int); int map_style_set(); virtual void map_delete(); - int map_find_hash(tagint); + virtual int map_find_hash(tagint); protected: // global to local ID mapping diff --git a/src/fix_langevin.cpp b/src/fix_langevin.cpp index adfa651147..0e083ce012 100644 --- a/src/fix_langevin.cpp +++ b/src/fix_langevin.cpp @@ -215,7 +215,7 @@ int FixLangevin::setmask() if (gjfflag) mask |= INITIAL_INTEGRATE; mask |= POST_FORCE; mask |= POST_FORCE_RESPA; - mask |= END_OF_STEP; + if (tallyflag || gjfflag) mask |= END_OF_STEP; return mask; } @@ -915,8 +915,6 @@ void FixLangevin::angmom_thermostat() void FixLangevin::end_of_step() { - if (!tallyflag && !gjfflag) return; - double **v = atom->v; int *mask = atom->mask; int nlocal = atom->nlocal; From 6074303f4abcbf877b017b654c8f0ef15f6e78b0 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Fri, 5 May 2023 19:13:49 -0600 Subject: [PATCH 2/6] whitespace --- src/KOKKOS/atom_map_kokkos.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/KOKKOS/atom_map_kokkos.cpp b/src/KOKKOS/atom_map_kokkos.cpp index b30f0eabc0..5a91b34f94 100644 --- a/src/KOKKOS/atom_map_kokkos.cpp +++ b/src/KOKKOS/atom_map_kokkos.cpp @@ -194,7 +194,7 @@ void AtomKokkos::map_set() d_map_hash.clear(); auto d_error_flag = k_error_flag.d_view; - Kokkos::deep_copy(d_error_flag,0); + Kokkos::deep_copy(d_error_flag,0); // for each tag find: // neighboring atoms with closest local id for sametag @@ -253,7 +253,7 @@ void AtomKokkos::map_set() if (insert_result.failed()) d_error_flag() = 1; } } - + }); auto h_error_flag = Kokkos::create_mirror_view_and_copy(LMPHostType(),d_error_flag); @@ -262,7 +262,7 @@ void AtomKokkos::map_set() k_sametag.modify_device(); - if (map_style == MAP_ARRAY) + if (map_style == MAP_ARRAY) k_map_array.modify_device(); else k_map_hash.modify_device(); From b9256fa4925bc7e0578309477d9de5bd2420262d Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Tue, 16 May 2023 11:09:37 -0600 Subject: [PATCH 3/6] Reduce memory allocations --- src/KOKKOS/atom_kokkos.cpp | 14 +++++-- src/KOKKOS/atom_kokkos.h | 19 +++++++++ src/KOKKOS/atom_map_kokkos.cpp | 73 +++++++++++++++++++--------------- src/KOKKOS/kokkos_type.h | 13 ++++++ 4 files changed, 85 insertions(+), 34 deletions(-) diff --git a/src/KOKKOS/atom_kokkos.cpp b/src/KOKKOS/atom_kokkos.cpp index feca086911..03537e7b88 100644 --- a/src/KOKKOS/atom_kokkos.cpp +++ b/src/KOKKOS/atom_kokkos.cpp @@ -26,16 +26,24 @@ #include "modify.h" #include "fix.h" -#include - using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ AtomKokkos::AtomKokkos(LAMMPS *lmp) : Atom(lmp) { - k_error_flag = DAT::tdual_int_scalar("atom:error_flag"); avecKK = nullptr; + + k_error_flag = DAT::tdual_int_scalar("atom:error_flag"); + + d_tag_min_max = t_tagint_2(Kokkos::NoInit("atom:tag_min_max")); + h_tag_min_max = t_host_tagint_2(Kokkos::NoInit("atom:tag_min_max")); + + d_tag_min = Kokkos::subview(d_tag_min_max,0); + d_tag_max = Kokkos::subview(d_tag_min_max,1); + + h_tag_min = Kokkos::subview(h_tag_min_max,0); + h_tag_max = Kokkos::subview(h_tag_min_max,1); } /* ---------------------------------------------------------------------- */ diff --git a/src/KOKKOS/atom_kokkos.h b/src/KOKKOS/atom_kokkos.h index a994542dd9..23566cff03 100644 --- a/src/KOKKOS/atom_kokkos.h +++ b/src/KOKKOS/atom_kokkos.h @@ -15,6 +15,8 @@ #include "atom.h" // IWYU pragma: export #include "kokkos_type.h" +#include + #ifndef LMP_ATOM_KOKKOS_H #define LMP_ATOM_KOKKOS_H @@ -84,6 +86,23 @@ class AtomKokkos : public Atom { DAT::tdual_int_1d k_map_array; dual_hash_type k_map_hash; + DAT::t_tagint_1d d_tag_sorted; + DAT::t_int_1d d_i_sorted; + + typedef Kokkos::DualView tdual_tagint_2; + typedef tdual_tagint_2::t_dev t_tagint_2; + typedef tdual_tagint_2::t_host t_host_tagint_2; + + t_tagint_2 d_tag_min_max; + t_host_tagint_2 h_tag_min_max; + + DAT::t_tagint_scalar d_tag_min,d_tag_max; + HAT::t_tagint_scalar h_tag_min,h_tag_max; + + using MapKeyViewType = decltype(d_tag_sorted); + using BinOpMap = Kokkos::BinOp1D; + Kokkos::BinSort Sorter; + class AtomVecKokkos* avecKK; // map lookup function inlined for efficiency diff --git a/src/KOKKOS/atom_map_kokkos.cpp b/src/KOKKOS/atom_map_kokkos.cpp index 5a91b34f94..f928e3c1d5 100644 --- a/src/KOKKOS/atom_map_kokkos.cpp +++ b/src/KOKKOS/atom_map_kokkos.cpp @@ -21,8 +21,6 @@ #include "modify.h" #include "neighbor_kokkos.h" -#include - #include using namespace LAMMPS_NS; @@ -145,33 +143,31 @@ void AtomKokkos::map_set() // sort by tag - auto d_tag_sorted = DAT::t_tagint_1d(Kokkos::NoInit("atom:tag_sorted"),nall); - auto d_i_sorted = DAT::t_int_1d(Kokkos::NoInit("atom:i_sorted"),nall); + int nmax = atom->nmax; - typedef Kokkos::DualView tdual_tagint_2; - typedef tdual_tagint_2::t_dev t_tagint_2; - typedef tdual_tagint_2::t_host t_host_tagint_2; - - auto d_tag_min_max = t_tagint_2(Kokkos::NoInit("atom:tag_min_max")); - auto h_tag_min_max = t_host_tagint_2(Kokkos::NoInit("atom:tag_min_max")); - - auto d_tag_min = Kokkos::subview(d_tag_min_max,0); - auto d_tag_max = Kokkos::subview(d_tag_min_max,1); - - auto h_tag_min = Kokkos::subview(h_tag_min_max,0); - auto h_tag_max = Kokkos::subview(h_tag_min_max,1); + int realloc_flag = 0; + if (d_tag_sorted.extent(0) < nmax) { + MemKK::realloc_kokkos(d_tag_sorted,"atom:tag_sorted",nmax); + MemKK::realloc_kokkos(d_i_sorted,"atom:i_sorted",nmax); + realloc_flag = 1; + } h_tag_min() = MAXTAGINT; h_tag_max() = 0; Kokkos::deep_copy(d_tag_min_max,h_tag_min_max); + auto l_tag_sorted = d_tag_sorted; + auto l_i_sorted = d_i_sorted; + auto l_tag_min = d_tag_min; + auto l_tag_max = d_tag_max; + Kokkos::parallel_for(nall, LAMMPS_LAMBDA(int i) { - d_i_sorted(i) = i; + l_i_sorted(i) = i; tagint tag_i = d_tag(i); - d_tag_sorted(i) = tag_i; - Kokkos::atomic_min(&d_tag_min(),tag_i); - Kokkos::atomic_max(&d_tag_max(),tag_i); + l_tag_sorted(i) = tag_i; + Kokkos::atomic_min(&l_tag_min(),tag_i); + Kokkos::atomic_max(&l_tag_max(),tag_i); }); Kokkos::deep_copy(h_tag_min_max,d_tag_min_max); @@ -179,12 +175,25 @@ void AtomKokkos::map_set() tagint min = h_tag_min(); tagint max = h_tag_max(); - using KeyViewType = decltype(d_tag_sorted); - using BinOp = Kokkos::BinOp1D; + using MapKeyViewType = decltype(d_tag_sorted); + using BinOpMap = Kokkos::BinOp1D; - BinOp binner(nall, min, max); + auto binner = BinOpMap(nall, min, max); + + if (!Sorter.bin_offsets.data() || realloc_flag) { + Sorter = Kokkos::BinSort(d_tag_sorted, 0, nall, binner, true); + MemKK::realloc_kokkos(Sorter.bin_count_atomic,"Kokkos::SortImpl::BinSortFunctor::bin_count",nmax+1); + Kokkos::deep_copy(Sorter.bin_count_atomic,0); + Sorter.bin_count_const = Sorter.bin_count_atomic; + MemKK::realloc_kokkos(Sorter.bin_offsets,"Kokkos::SortImpl::BinSortFunctor::bin_offsets",nmax+1); + MemKK::realloc_kokkos(Sorter.sort_order,"Kokkos::SortImpl::BinSortFunctor::sort_order",nmax); + } else { + Kokkos::deep_copy(Sorter.bin_count_atomic,0); + Sorter.bin_op = binner; + Sorter.range_begin = 0; + Sorter.range_end = nall; + } - Kokkos::BinSort Sorter(d_tag_sorted, 0, nall, binner, true); Sorter.create_permute_vector(LMPDeviceType()); Sorter.sort(LMPDeviceType(), d_tag_sorted, 0, nall); Sorter.sort(LMPDeviceType(), d_i_sorted, 0, nall); @@ -201,8 +210,8 @@ void AtomKokkos::map_set() // atom with smallest local id for atom map Kokkos::parallel_for(nall, LAMMPS_LAMBDA(int ii) { - const int i = d_i_sorted(ii); - const tagint tag_i = d_tag_sorted(ii); + const int i = l_i_sorted(ii); + const tagint tag_i = l_tag_sorted(ii); int i_min = i; int i_closest = MAXTAGINT; @@ -213,9 +222,9 @@ void AtomKokkos::map_set() int closest_flag = 0; while (jj < nall) { - const tagint tag_j = d_tag_sorted(jj); + const tagint tag_j = l_tag_sorted(jj); if (tag_j != tag_i) break; - const int j = d_i_sorted(jj); + const int j = l_i_sorted(jj); i_min = MIN(i_min,j); if (j > i) { i_closest = MIN(i_closest,j); @@ -229,9 +238,9 @@ void AtomKokkos::map_set() jj = ii-1; while (jj >= 0) { - const tagint tag_j = d_tag_sorted(jj); + const tagint tag_j = l_tag_sorted(jj); if (tag_j != tag_i) break; - const int j = d_i_sorted(jj); + const int j = l_i_sorted(jj); i_min = MIN(i_min,j); if (j > i) { i_closest = MIN(i_closest,j); @@ -256,7 +265,9 @@ void AtomKokkos::map_set() }); - auto h_error_flag = Kokkos::create_mirror_view_and_copy(LMPHostType(),d_error_flag); + auto h_error_flag = k_error_flag.h_view; + Kokkos::deep_copy(h_error_flag,d_error_flag); + if (h_error_flag()) error->one(FLERR,"Failed to insert into Kokkos hash atom map"); diff --git a/src/KOKKOS/kokkos_type.h b/src/KOKKOS/kokkos_type.h index f8d2b4947a..3c9886905e 100644 --- a/src/KOKKOS/kokkos_type.h +++ b/src/KOKKOS/kokkos_type.h @@ -652,6 +652,13 @@ typedef tdual_int_scalar::t_dev_const t_int_scalar_const; typedef tdual_int_scalar::t_dev_um t_int_scalar_um; typedef tdual_int_scalar::t_dev_const_um t_int_scalar_const_um; +typedef Kokkos:: + DualView tdual_tagint_scalar; +typedef tdual_tagint_scalar::t_dev t_tagint_scalar; +typedef tdual_tagint_scalar::t_dev_const t_tagint_scalar_const; +typedef tdual_tagint_scalar::t_dev_um t_tagint_scalar_um; +typedef tdual_tagint_scalar::t_dev_const_um t_tagint_scalar_const_um; + typedef Kokkos:: DualView tdual_float_scalar; @@ -970,6 +977,12 @@ typedef tdual_int_scalar::t_host_const t_int_scalar_const; typedef tdual_int_scalar::t_host_um t_int_scalar_um; typedef tdual_int_scalar::t_host_const_um t_int_scalar_const_um; +typedef Kokkos::DualView tdual_tagint_scalar; +typedef tdual_tagint_scalar::t_host t_tagint_scalar; +typedef tdual_tagint_scalar::t_host_const t_tagint_scalar_const; +typedef tdual_tagint_scalar::t_host_um t_tagint_scalar_um; +typedef tdual_tagint_scalar::t_host_const_um t_tagint_scalar_const_um; + typedef Kokkos::DualView tdual_float_scalar; typedef tdual_float_scalar::t_host t_float_scalar; typedef tdual_float_scalar::t_host_const t_float_scalar_const; From b1ee177c0a01ddf50ab9ae4ebf275d486111d5d9 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Tue, 16 May 2023 12:43:19 -0600 Subject: [PATCH 4/6] Must use local variable for lambda capture --- src/KOKKOS/atom_map_kokkos.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/KOKKOS/atom_map_kokkos.cpp b/src/KOKKOS/atom_map_kokkos.cpp index f928e3c1d5..575d87cf45 100644 --- a/src/KOKKOS/atom_map_kokkos.cpp +++ b/src/KOKKOS/atom_map_kokkos.cpp @@ -161,6 +161,7 @@ void AtomKokkos::map_set() auto l_i_sorted = d_i_sorted; auto l_tag_min = d_tag_min; auto l_tag_max = d_tag_max; + int map_style_array = (map_style == MAP_ARRAY); Kokkos::parallel_for(nall, LAMMPS_LAMBDA(int i) { l_i_sorted(i) = i; @@ -255,7 +256,7 @@ void AtomKokkos::map_set() d_sametag(i) = i_closest; if (i == i_min) { - if (map_style == MAP_ARRAY) + if (map_style_array) d_map_array(tag_i) = i_min; else { auto insert_result = d_map_hash.insert(tag_i, i_min); From 858fd4cc2c2391293c8419cb203d5b55bfb287e5 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Tue, 16 May 2023 15:23:31 -0600 Subject: [PATCH 5/6] Small optimization --- src/KOKKOS/domain_kokkos.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/KOKKOS/domain_kokkos.cpp b/src/KOKKOS/domain_kokkos.cpp index 9478aa939d..6311d45a3f 100644 --- a/src/KOKKOS/domain_kokkos.cpp +++ b/src/KOKKOS/domain_kokkos.cpp @@ -571,9 +571,11 @@ void DomainKokkos::lamda2x(int n) KOKKOS_INLINE_FUNCTION void DomainKokkos::operator()(TagDomain_lamda2x, const int &i) const { - x(i,0) = h[0]*x(i,0) + h[5]*x(i,1) + h[4]*x(i,2) + boxlo[0]; - x(i,1) = h[1]*x(i,1) + h[3]*x(i,2) + boxlo[1]; - x(i,2) = h[2]*x(i,2) + boxlo[2]; + const double xi1 = x(i,1); + const double xi2 = x(i,2); + x(i,0) = h[0]*x(i,0) + h[5]*xi1 + h[4]*xi2 + boxlo[0]; + x(i,1) = h[1]*xi1 + h[3]*xi2 + boxlo[1]; + x(i,2) = h[2]*xi2 + boxlo[2]; } /* ---------------------------------------------------------------------- From 4f57026dc062fa03b08176c605a345835fe55672 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Tue, 16 May 2023 21:15:01 -0400 Subject: [PATCH 6/6] whitespace --- src/KOKKOS/atom_map_kokkos.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/KOKKOS/atom_map_kokkos.cpp b/src/KOKKOS/atom_map_kokkos.cpp index 575d87cf45..92786b7e37 100644 --- a/src/KOKKOS/atom_map_kokkos.cpp +++ b/src/KOKKOS/atom_map_kokkos.cpp @@ -161,7 +161,7 @@ void AtomKokkos::map_set() auto l_i_sorted = d_i_sorted; auto l_tag_min = d_tag_min; auto l_tag_max = d_tag_max; - int map_style_array = (map_style == MAP_ARRAY); + int map_style_array = (map_style == MAP_ARRAY); Kokkos::parallel_for(nall, LAMMPS_LAMBDA(int i) { l_i_sorted(i) = i;