Merge pull request #3769 from stanmoore1/kk_optimize

Kokkos package optimizations
This commit is contained in:
Axel Kohlmeyer
2023-05-17 07:13:04 -04:00
committed by GitHub
17 changed files with 426 additions and 337 deletions

View File

@ -150,20 +150,26 @@ void AngleCosineKokkos<DeviceType>::operator()(TagAngleCosineCompute<NEWTON_BOND
const int i3 = anglelist(n,2);
const int type = anglelist(n,3);
const F_FLOAT k = d_k[type];
const F_FLOAT x20 = x(i2,0);
const F_FLOAT x21 = x(i2,1);
const F_FLOAT x22 = x(i2,2);
// 1st bond
const F_FLOAT delx1 = x(i1,0) - x(i2,0);
const F_FLOAT dely1 = x(i1,1) - x(i2,1);
const F_FLOAT delz1 = x(i1,2) - x(i2,2);
const F_FLOAT delx1 = x(i1,0) - x20;
const F_FLOAT dely1 = x(i1,1) - x21;
const F_FLOAT delz1 = x(i1,2) - x22;
const F_FLOAT rsq1 = delx1*delx1 + dely1*dely1 + delz1*delz1;
const F_FLOAT r1 = sqrt(rsq1);
// 2nd bond
const F_FLOAT delx2 = x(i3,0) - x(i2,0);
const F_FLOAT dely2 = x(i3,1) - x(i2,1);
const F_FLOAT delz2 = x(i3,2) - x(i2,2);
const F_FLOAT delx2 = x(i3,0) - x20;
const F_FLOAT dely2 = x(i3,1) - x21;
const F_FLOAT delz2 = x(i3,2) - x22;
const F_FLOAT rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2;
const F_FLOAT r2 = sqrt(rsq2);
@ -178,9 +184,9 @@ void AngleCosineKokkos<DeviceType>::operator()(TagAngleCosineCompute<NEWTON_BOND
// force & energy
F_FLOAT eangle = 0.0;
if (eflag) eangle = d_k[type]*(1.0+c);
if (eflag) eangle = k*(1.0+c);
const F_FLOAT a = d_k[type];
const F_FLOAT a = k;
const F_FLOAT a11 = a*c / rsq1;
const F_FLOAT a12 = -a / (r1*r2);
const F_FLOAT a22 = a*c / rsq2;

View File

@ -26,16 +26,24 @@
#include "modify.h"
#include "fix.h"
#include <Kokkos_Sort.hpp>
using namespace LAMMPS_NS;
/* ---------------------------------------------------------------------- */
AtomKokkos::AtomKokkos(LAMMPS *lmp) : Atom(lmp)
{
k_error_flag = DAT::tdual_int_scalar("atom:error_flag");
avecKK = nullptr;
k_error_flag = DAT::tdual_int_scalar("atom:error_flag");
d_tag_min_max = t_tagint_2(Kokkos::NoInit("atom:tag_min_max"));
h_tag_min_max = t_host_tagint_2(Kokkos::NoInit("atom:tag_min_max"));
d_tag_min = Kokkos::subview(d_tag_min_max,0);
d_tag_max = Kokkos::subview(d_tag_min_max,1);
h_tag_min = Kokkos::subview(h_tag_min_max,0);
h_tag_max = Kokkos::subview(h_tag_min_max,1);
}
/* ---------------------------------------------------------------------- */

View File

@ -15,6 +15,8 @@
#include "atom.h" // IWYU pragma: export
#include "kokkos_type.h"
#include <Kokkos_Sort.hpp>
#ifndef LMP_ATOM_KOKKOS_H
#define LMP_ATOM_KOKKOS_H
@ -73,19 +75,50 @@ class AtomKokkos : public Atom {
~AtomKokkos() override;
void map_init(int check = 1) override;
void map_clear() override;
void map_set() override;
void map_one(tagint, int) override;
void map_delete() override;
int map_find_hash(tagint) override;
DAT::tdual_int_scalar k_error_flag;
DAT::tdual_int_1d k_sametag;
DAT::tdual_int_1d k_map_array;
DAT::tdual_int_scalar k_error_flag;
dual_hash_type k_map_hash;
DAT::t_tagint_1d d_tag_sorted;
DAT::t_int_1d d_i_sorted;
typedef Kokkos::DualView<tagint[2], LMPDeviceType::array_layout, LMPDeviceType> tdual_tagint_2;
typedef tdual_tagint_2::t_dev t_tagint_2;
typedef tdual_tagint_2::t_host t_host_tagint_2;
t_tagint_2 d_tag_min_max;
t_host_tagint_2 h_tag_min_max;
DAT::t_tagint_scalar d_tag_min,d_tag_max;
HAT::t_tagint_scalar h_tag_min,h_tag_max;
using MapKeyViewType = decltype(d_tag_sorted);
using BinOpMap = Kokkos::BinOp1D<MapKeyViewType>;
Kokkos::BinSort<MapKeyViewType, BinOpMap> Sorter;
class AtomVecKokkos* avecKK;
// map lookup function inlined for efficiency
// return -1 if no map defined
inline int map(tagint global) override
{
if (map_style == 1) {
k_map_array.sync_host();
return map_array[global];
} else if (map_style == 2)
return map_find_hash(global);
else
return -1;
};
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
static int map_kokkos(tagint global, int map_style, const DAT::tdual_int_1d &k_map_array, const dual_hash_type &k_map_hash)

View File

@ -34,8 +34,6 @@ using namespace LAMMPS_NS;
set entire array to -1 as initial values
for hash option:
map_nhash = length of hash table
map_nbucket = # of hash buckets, prime larger than map_nhash * 2
so buckets will only be filled with 0 or 1 atoms on average
------------------------------------------------------------------------- */
void AtomKokkos::map_init(int check)
@ -58,15 +56,7 @@ void AtomKokkos::map_init(int check)
// for hash, set all buckets to empty, put all entries in free list
if (!recreate) {
if (map_style == MAP_ARRAY) {
for (int i = 0; i <= map_tag_max; i++) map_array[i] = -1;
} else {
for (int i = 0; i < map_nbucket; i++) map_bucket[i] = -1;
map_nused = 0;
map_free = 0;
for (int i = 0; i < map_nhash; i++) map_hash[i].next = i + 1;
if (map_nhash > 0) map_hash[map_nhash - 1].next = -1;
}
map_clear();
// recreating: delete old map and create new one for array or hash
@ -76,7 +66,8 @@ void AtomKokkos::map_init(int check)
if (map_style == MAP_ARRAY) {
map_maxarray = map_tag_max;
memoryKK->create_kokkos(k_map_array, map_array, map_maxarray + 1, "atom:map_array");
for (int i = 0; i <= map_tag_max; i++) map_array[i] = -1;
Kokkos::deep_copy(k_map_array.d_view,-1);
k_map_array.modify_device();
} else {
@ -90,35 +81,26 @@ void AtomKokkos::map_init(int check)
map_nhash *= 2;
map_nhash = MAX(map_nhash, 1000);
// map_nbucket = prime just larger than map_nhash
// next_prime() should be fast enough,
// about 10% of odd integers are prime above 1M
map_nbucket = next_prime(map_nhash);
// set all buckets to empty
// set hash to map_nhash in length
// put all hash entries in free list and point them to each other
map_bucket = new int[map_nbucket];
for (int i = 0; i < map_nbucket; i++) map_bucket[i] = -1;
map_hash = new HashElem[map_nhash];
map_nused = 0;
map_free = 0;
for (int i = 0; i < map_nhash; i++) map_hash[i].next = i + 1;
map_hash[map_nhash - 1].next = -1;
// use "view" template method to avoid unnecessary deep_copy
auto h_map_hash = k_map_hash.view<LMPHostType>(); // get type
h_map_hash = decltype(h_map_hash)(map_nhash);
k_map_hash.view<LMPHostType>() = h_map_hash;
k_map_hash = dual_hash_type(map_nhash);
}
}
}
k_sametag.modify_host();
if (map_style == Atom::MAP_ARRAY) k_map_array.modify_host();
/* ----------------------------------------------------------------------
clear global -> local map for all of my own and ghost atoms
for hash table option:
global ID may not be in table if image atom was already cleared
------------------------------------------------------------------------- */
void AtomKokkos::map_clear()
{
if (map_style == Atom::MAP_ARRAY) {
Kokkos::deep_copy(k_map_array.d_view,-1);
k_map_array.modify_device();
} else {
k_map_hash.d_view.clear();
k_map_hash.modify_device();
}
}
/* ----------------------------------------------------------------------
@ -135,28 +117,16 @@ void AtomKokkos::map_set()
{
int nall = nlocal + nghost;
atomKK->sync(Host, TAG_MASK);
// possible reallocation of sametag must come before loop over atoms
// since loop sets sametag
k_sametag.sync_host();
if (map_style == Atom::MAP_ARRAY) k_map_array.sync_host();
if (nall > max_same) {
max_same = nall + EXTRA;
memoryKK->destroy_kokkos(k_sametag, sametag);
memoryKK->create_kokkos(k_sametag, sametag, max_same, "atom:sametag");
}
if (map_style == MAP_ARRAY) {
// possible reallocation of sametag must come before loop over atoms
// since loop sets sametag
if (nall > max_same) {
max_same = nall + EXTRA;
memoryKK->destroy_kokkos(k_sametag, sametag);
memoryKK->create_kokkos(k_sametag, sametag, max_same, "atom:sametag");
}
for (int i = nall - 1; i >= 0; i--) {
sametag[i] = map_array[tag[i]];
map_array[tag[i]] = i;
}
} else {
if (map_style == MAP_HASH) {
// if this proc has more atoms than hash table size, call map_init()
// call with 0 since max atomID in system has not changed
@ -164,107 +134,191 @@ void AtomKokkos::map_set()
// b/c map_init() may invoke map_delete(), whacking sametag
if (nall > map_nhash) map_init(0);
if (nall > max_same) {
max_same = nall + EXTRA;
memoryKK->destroy_kokkos(k_sametag, sametag);
memoryKK->create_kokkos(k_sametag, sametag, max_same, "atom:sametag");
}
int previous, ibucket, index;
tagint global;
for (int i = nall - 1; i >= 0; i--) {
sametag[i] = map_find_hash(tag[i]);
// search for key
// if found it, just overwrite local value with index
previous = -1;
global = tag[i];
ibucket = global % map_nbucket;
index = map_bucket[ibucket];
while (index > -1) {
if (map_hash[index].global == global) break;
previous = index;
index = map_hash[index].next;
}
if (index > -1) {
map_hash[index].local = i;
continue;
}
// take one entry from free list
// add the new global/local pair as entry at end of bucket list
// special logic if this entry is 1st in bucket
index = map_free;
map_free = map_hash[map_free].next;
if (previous == -1)
map_bucket[ibucket] = index;
else
map_hash[previous].next = index;
map_hash[index].global = global;
map_hash[index].local = i;
map_hash[index].next = -1;
map_nused++;
}
// Copy to Kokkos hash
// use "view" template method to avoid unnecessary deep_copy
auto h_map_hash = k_map_hash.view<LMPHostType>();
h_map_hash.clear();
for (int i = nall - 1; i >= 0; i--) {
// search for key
// if don't find it, done
previous = -1;
global = tag[i];
ibucket = global % map_nbucket;
index = map_bucket[ibucket];
while (index > -1) {
if (map_hash[index].global == global) break;
previous = index;
index = map_hash[index].next;
}
if (index == -1) continue;
int local = map_hash[index].local;
auto insert_result = h_map_hash.insert(global, local);
if (insert_result.failed()) error->one(FLERR, "Kokkos::UnorderedMap insertion failed");
}
}
k_sametag.modify_host();
if (map_style == Atom::MAP_ARRAY)
k_map_array.modify_host();
else if (map_style == Atom::MAP_HASH) {
atomKK->sync(Device, TAG_MASK);
// use "view" template method to avoid unnecessary deep_copy
auto d_tag = atomKK->k_tag.d_view;
auto d_sametag = k_sametag.d_view;
auto h_map_hash = k_map_hash.view<LMPHostType>();
auto d_map_hash = k_map_hash.view<LMPDeviceType>();
// sort by tag
// check if fix shake or neigh bond needs a device hash
int nmax = atom->nmax;
int device_hash_flag = 0;
auto neighborKK = (NeighborKokkos *) neighbor;
if (neighborKK->device_flag) device_hash_flag = 1;
for (int n = 0; n < modify->nfix; n++)
if (utils::strmatch(modify->fix[n]->style, "^shake"))
if (modify->fix[n]->execution_space == Device) device_hash_flag = 1;
if (device_hash_flag) {
Kokkos::deep_copy(d_map_hash, h_map_hash);
k_map_hash.view<LMPDeviceType>() = d_map_hash;
}
int realloc_flag = 0;
if (d_tag_sorted.extent(0) < nmax) {
MemKK::realloc_kokkos(d_tag_sorted,"atom:tag_sorted",nmax);
MemKK::realloc_kokkos(d_i_sorted,"atom:i_sorted",nmax);
realloc_flag = 1;
}
h_tag_min() = MAXTAGINT;
h_tag_max() = 0;
Kokkos::deep_copy(d_tag_min_max,h_tag_min_max);
auto l_tag_sorted = d_tag_sorted;
auto l_i_sorted = d_i_sorted;
auto l_tag_min = d_tag_min;
auto l_tag_max = d_tag_max;
int map_style_array = (map_style == MAP_ARRAY);
Kokkos::parallel_for(nall, LAMMPS_LAMBDA(int i) {
l_i_sorted(i) = i;
tagint tag_i = d_tag(i);
l_tag_sorted(i) = tag_i;
Kokkos::atomic_min(&l_tag_min(),tag_i);
Kokkos::atomic_max(&l_tag_max(),tag_i);
});
Kokkos::deep_copy(h_tag_min_max,d_tag_min_max);
tagint min = h_tag_min();
tagint max = h_tag_max();
using MapKeyViewType = decltype(d_tag_sorted);
using BinOpMap = Kokkos::BinOp1D<MapKeyViewType>;
auto binner = BinOpMap(nall, min, max);
if (!Sorter.bin_offsets.data() || realloc_flag) {
Sorter = Kokkos::BinSort<MapKeyViewType, BinOpMap>(d_tag_sorted, 0, nall, binner, true);
MemKK::realloc_kokkos(Sorter.bin_count_atomic,"Kokkos::SortImpl::BinSortFunctor::bin_count",nmax+1);
Kokkos::deep_copy(Sorter.bin_count_atomic,0);
Sorter.bin_count_const = Sorter.bin_count_atomic;
MemKK::realloc_kokkos(Sorter.bin_offsets,"Kokkos::SortImpl::BinSortFunctor::bin_offsets",nmax+1);
MemKK::realloc_kokkos(Sorter.sort_order,"Kokkos::SortImpl::BinSortFunctor::sort_order",nmax);
} else {
Kokkos::deep_copy(Sorter.bin_count_atomic,0);
Sorter.bin_op = binner;
Sorter.range_begin = 0;
Sorter.range_end = nall;
}
Sorter.create_permute_vector(LMPDeviceType());
Sorter.sort(LMPDeviceType(), d_tag_sorted, 0, nall);
Sorter.sort(LMPDeviceType(), d_i_sorted, 0, nall);
auto d_map_array = k_map_array.d_view;
auto d_map_hash = k_map_hash.d_view;
d_map_hash.clear();
auto d_error_flag = k_error_flag.d_view;
Kokkos::deep_copy(d_error_flag,0);
// for each tag find:
// neighboring atoms with closest local id for sametag
// atom with smallest local id for atom map
Kokkos::parallel_for(nall, LAMMPS_LAMBDA(int ii) {
const int i = l_i_sorted(ii);
const tagint tag_i = l_tag_sorted(ii);
int i_min = i;
int i_closest = MAXTAGINT;
// search atoms with same tag in the forward direction
int jj = ii+1;
int closest_flag = 0;
while (jj < nall) {
const tagint tag_j = l_tag_sorted(jj);
if (tag_j != tag_i) break;
const int j = l_i_sorted(jj);
i_min = MIN(i_min,j);
if (j > i) {
i_closest = MIN(i_closest,j);
closest_flag = 1;
}
jj++;
}
// search atoms with same tag in the reverse direction
jj = ii-1;
while (jj >= 0) {
const tagint tag_j = l_tag_sorted(jj);
if (tag_j != tag_i) break;
const int j = l_i_sorted(jj);
i_min = MIN(i_min,j);
if (j > i) {
i_closest = MIN(i_closest,j);
closest_flag = 1;
}
jj--;
}
if (!closest_flag)
i_closest = -1;
d_sametag(i) = i_closest;
if (i == i_min) {
if (map_style_array)
d_map_array(tag_i) = i_min;
else {
auto insert_result = d_map_hash.insert(tag_i, i_min);
if (insert_result.failed()) d_error_flag() = 1;
}
}
});
auto h_error_flag = k_error_flag.h_view;
Kokkos::deep_copy(h_error_flag,d_error_flag);
if (h_error_flag())
error->one(FLERR,"Failed to insert into Kokkos hash atom map");
k_sametag.modify_device();
if (map_style == MAP_ARRAY)
k_map_array.modify_device();
else
k_map_hash.modify_device();
}
/* ----------------------------------------------------------------------
set global to local map for one atom
for hash table option:
global ID may already be in table if atom was already set
called by Special class
------------------------------------------------------------------------- */
void AtomKokkos::map_one(tagint global, int local)
{
if (map_style == MAP_ARRAY) {
k_map_array.sync_host();
k_map_array.h_view[global] = local;
} else {
k_map_hash.sync_host();
auto& h_map_hash = k_map_hash.h_view;
auto insert_result = h_map_hash.insert(global, local);
if (insert_result.existing())
h_map_hash.value_at(h_map_hash.find(global)) = local;
else if (insert_result.failed())
error->one(FLERR,"Failed to insert into Kokkos hash atom map");
}
}
/* ----------------------------------------------------------------------
lookup global ID in hash table, return local index
called by map() in atom.h
------------------------------------------------------------------------- */
int AtomKokkos::map_find_hash(tagint global)
{
k_map_hash.sync_host();
auto& h_map_hash = k_map_hash.h_view;
int local = -1;
auto index = h_map_hash.find(global);
if (h_map_hash.valid_at(index))
local = h_map_hash.value_at(index);
return local;
}
/* ----------------------------------------------------------------------
@ -279,10 +333,6 @@ void AtomKokkos::map_delete()
if (map_style == MAP_ARRAY) {
memoryKK->destroy_kokkos(k_map_array, map_array);
map_array = nullptr;
} else {
k_map_hash.h_view = host_hash_type();
k_map_hash.d_view = hash_type();
}
Atom::map_delete();
} else
k_map_hash = dual_hash_type();
}

View File

@ -35,7 +35,7 @@ AtomVecKokkos::AtomVecKokkos(LAMMPS *lmp) : AtomVec(lmp)
unpack_exchange_indices_flag = 0;
size_exchange = 0;
k_count = DAT::tdual_int_1d("atom::k_count",1);
k_count = DAT::tdual_int_1d("atom:k_count",1);
atomKK = (AtomKokkos *) atom;
commKK = (CommKokkos *) comm;
}

View File

@ -43,13 +43,8 @@ BondFENEKokkos<DeviceType>::BondFENEKokkos(LAMMPS *lmp) : BondFENE(lmp)
datamask_read = X_MASK | F_MASK | ENERGY_MASK | VIRIAL_MASK;
datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK;
k_warning_flag = DAT::tdual_int_scalar("Bond:warning_flag");
d_warning_flag = k_warning_flag.view<DeviceType>();
h_warning_flag = k_warning_flag.h_view;
k_error_flag = DAT::tdual_int_scalar("Bond:error_flag");
d_error_flag = k_error_flag.view<DeviceType>();
h_error_flag = k_error_flag.h_view;
d_flag = typename AT::t_int_scalar("bond:flag");
h_flag = HAT::t_int_scalar("bond:flag_mirror");
}
/* ---------------------------------------------------------------------- */
@ -99,13 +94,7 @@ void BondFENEKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
nlocal = atom->nlocal;
newton_bond = force->newton_bond;
h_warning_flag() = 0;
k_warning_flag.template modify<LMPHostType>();
k_warning_flag.template sync<DeviceType>();
h_error_flag() = 0;
k_error_flag.template modify<LMPHostType>();
k_error_flag.template sync<DeviceType>();
Kokkos::deep_copy(d_flag,0);
copymode = 1;
@ -127,14 +116,11 @@ void BondFENEKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
}
}
k_warning_flag.template modify<DeviceType>();
k_warning_flag.template sync<LMPHostType>();
if (h_warning_flag())
error->warning(FLERR,"FENE bond too long");
Kokkos::deep_copy(h_flag,d_flag);
k_error_flag.template modify<DeviceType>();
k_error_flag.template sync<LMPHostType>();
if (h_error_flag())
if (h_flag() == 1)
error->warning(FLERR,"FENE bond too long");
else if (h_flag() == 2)
error->one(FLERR,"Bad FENE bond");
if (eflag_global) energy += ev.evdwl;
@ -165,8 +151,6 @@ template<int NEWTON_BOND, int EVFLAG>
KOKKOS_INLINE_FUNCTION
void BondFENEKokkos<DeviceType>::operator()(TagBondFENECompute<NEWTON_BOND,EVFLAG>, const int &n, EV_FLOAT& ev) const {
if (d_error_flag()) return;
// The f array is atomic
Kokkos::View<F_FLOAT*[3], typename DAT::t_f_array::array_layout,typename KKDevice<DeviceType>::value,Kokkos::MemoryTraits<Kokkos::Atomic|Kokkos::Unmanaged> > a_f = f;
@ -178,10 +162,15 @@ void BondFENEKokkos<DeviceType>::operator()(TagBondFENECompute<NEWTON_BOND,EVFLA
const F_FLOAT dely = x(i1,1) - x(i2,1);
const F_FLOAT delz = x(i1,2) - x(i2,2);
const F_FLOAT r0 = d_r0[type];
const F_FLOAT k = d_k[type];
const F_FLOAT sigma = d_sigma[type];
const F_FLOAT epsilon = d_epsilon[type];
// force from log term
const F_FLOAT rsq = delx*delx + dely*dely + delz*delz;
const F_FLOAT r0sq = d_r0[type] * d_r0[type];
const F_FLOAT r0sq = r0 * r0;
F_FLOAT rlogarg = 1.0 - rsq/r0sq;
// if r -> r0, then rlogarg < 0.0 which is an error
@ -189,31 +178,32 @@ void BondFENEKokkos<DeviceType>::operator()(TagBondFENECompute<NEWTON_BOND,EVFLA
// if r > 2*r0 something serious is wrong, abort
if (rlogarg < 0.1) {
if (!d_warning_flag())
d_warning_flag() = 1;
if (rlogarg <= -3.0 && !d_error_flag())
d_error_flag() = 1;
if (rlogarg <= -3.0)
d_flag() = 2;
else
d_flag() = 1;
rlogarg = 0.1;
}
F_FLOAT fbond = -d_k[type]/rlogarg;
F_FLOAT fbond = -k/rlogarg;
// force from LJ term
F_FLOAT sr6 = 0.0;
if (rsq < MY_CUBEROOT2*d_sigma[type]*d_sigma[type]) {
const F_FLOAT sr2 = d_sigma[type]*d_sigma[type]/rsq;
F_FLOAT sigma2 = sigma*sigma;
if (rsq < MY_CUBEROOT2*sigma2) {
const F_FLOAT sr2 = sigma2/rsq;
sr6 = sr2*sr2*sr2;
fbond += 48.0*d_epsilon[type]*sr6*(sr6-0.5)/rsq;
fbond += 48.0*epsilon*sr6*(sr6-0.5)/rsq;
}
// energy
F_FLOAT ebond = 0.0;
if (eflag) {
ebond = -0.5 * d_k[type]*r0sq*log(rlogarg);
if (rsq < MY_CUBEROOT2*d_sigma[type]*d_sigma[type])
ebond += 4.0*d_epsilon[type]*sr6*(sr6-1.0) + d_epsilon[type];
ebond = -0.5 * k*r0sq*log(rlogarg);
if (rsq < MY_CUBEROOT2*sigma2)
ebond += 4.0*epsilon*sr6*(sr6-1.0) + epsilon;
}
// apply force to each of 2 atoms

View File

@ -71,13 +71,8 @@ class BondFENEKokkos : public BondFENE {
typename ArrayTypes<DeviceType>::t_efloat_1d d_eatom;
typename ArrayTypes<DeviceType>::t_virial_array d_vatom;
DAT::tdual_int_scalar k_warning_flag;
typename AT::t_int_scalar d_warning_flag;
HAT::t_int_scalar h_warning_flag;
DAT::tdual_int_scalar k_error_flag;
typename AT::t_int_scalar d_error_flag;
HAT::t_int_scalar h_error_flag;
typename AT::t_int_scalar d_flag;
HAT::t_int_scalar h_flag;
int nlocal,newton_bond;
int eflag,vflag;

View File

@ -729,13 +729,8 @@ void CommKokkos::exchange_device()
double lo,hi;
MPI_Request request;
// clear global->local map for owned and ghost atoms
// b/c atoms migrate to new procs in exchange() and
// new ghosts are created in borders()
// map_set() is done at end of borders()
// clear ghost count and any ghost bonus data internal to AtomVec
if (map_style != Atom::MAP_NONE) atom->map_clear();
atom->nghost = 0;
atom->avec->clear_bonus();
@ -1275,10 +1270,8 @@ void CommKokkos::borders_device() {
// reset global->local map
if (map_style != Atom::MAP_NONE) {
atomKK->sync(Host,TAG_MASK);
if (map_style != Atom::MAP_NONE)
atom->map_set();
}
}
/* ----------------------------------------------------------------------

View File

@ -571,9 +571,11 @@ void DomainKokkos::lamda2x(int n)
KOKKOS_INLINE_FUNCTION
void DomainKokkos::operator()(TagDomain_lamda2x, const int &i) const {
x(i,0) = h[0]*x(i,0) + h[5]*x(i,1) + h[4]*x(i,2) + boxlo[0];
x(i,1) = h[1]*x(i,1) + h[3]*x(i,2) + boxlo[1];
x(i,2) = h[2]*x(i,2) + boxlo[2];
const double xi1 = x(i,1);
const double xi2 = x(i,2);
x(i,0) = h[0]*x(i,0) + h[5]*xi1 + h[4]*xi2 + boxlo[0];
x(i,1) = h[1]*xi1 + h[3]*xi2 + boxlo[1];
x(i,2) = h[2]*xi2 + boxlo[2];
}
/* ----------------------------------------------------------------------

View File

@ -684,8 +684,6 @@ void FixLangevinKokkos<DeviceType>::zero_force_item(int i) const
template<class DeviceType>
void FixLangevinKokkos<DeviceType>::compute_target()
{
atomKK->sync(Host, MASK_MASK);
mask = atomKK->k_mask.template view<DeviceType>();
int nlocal = atomKK->nlocal;
double delta = update->ntimestep - update->beginstep;
@ -710,12 +708,14 @@ void FixLangevinKokkos<DeviceType>::compute_target()
memoryKK->destroy_kokkos(k_tforce,tforce);
memoryKK->create_kokkos(k_tforce,tforce,maxatom2,"langevin:tforce");
d_tforce = k_tforce.template view<DeviceType>();
h_tforce = k_tforce.template view<LMPHostType>();
h_tforce = k_tforce.h_view;
}
input->variable->compute_atom(tvar,igroup,tforce,1,0); // tforce is modified on host
k_tforce.template modify<LMPHostType>();
k_tforce.modify_host();
atomKK->sync(Host, MASK_MASK);
auto h_mask = atomKK->k_mask.h_view;
for (int i = 0; i < nlocal; i++)
if (mask[i] & groupbit)
if (h_mask[i] & groupbit)
if (h_tforce[i] < 0.0)
error->one(FLERR,
"Fix langevin variable returned negative temperature");

View File

@ -221,6 +221,7 @@ void FixShakeKokkos<DeviceType>::pre_neighbor()
k_map_array.template sync<DeviceType>();
} else if (map_style == Atom::MAP_HASH) {
k_map_hash = atomKK->k_map_hash;
k_map_hash.template sync<DeviceType>();
}
k_shake_flag.sync<DeviceType>();
@ -248,6 +249,7 @@ void FixShakeKokkos<DeviceType>::pre_neighbor()
k_map_array.template sync<DeviceType>();
} else if (map_style == Atom::MAP_HASH) {
k_map_hash = atomKK->k_map_hash;
k_map_hash.template sync<DeviceType>();
}
atomKK->k_sametag.sync<DeviceType>();
@ -357,6 +359,7 @@ void FixShakeKokkos<DeviceType>::post_force(int vflag)
k_map_array.template sync<DeviceType>();
} else if (map_style == Atom::MAP_HASH) {
k_map_hash = atomKK->k_map_hash;
k_map_hash.template sync<DeviceType>();
}
if (d_rmass.data())

View File

@ -570,6 +570,21 @@ struct dual_hash_type {
hash_type d_view;
host_hash_type h_view;
bool modified_device;
bool modified_host;
dual_hash_type() {
modified_device = modified_host = false;
d_view = hash_type();
h_view = host_hash_type();
}
dual_hash_type(int capacity) {
modified_device = modified_host = false;
d_view = hash_type(capacity);
h_view = host_hash_type(capacity);
}
template<class DeviceType>
std::enable_if_t<(std::is_same<DeviceType,LMPDeviceType>::value || Kokkos::SpaceAccessibility<LMPDeviceType::memory_space,LMPHostType::memory_space>::accessible),hash_type&> view() {return d_view;}
@ -584,6 +599,42 @@ struct dual_hash_type {
KOKKOS_INLINE_FUNCTION
std::enable_if_t<!(std::is_same<DeviceType,LMPDeviceType>::value || Kokkos::SpaceAccessibility<LMPDeviceType::memory_space,LMPHostType::memory_space>::accessible),const host_hash_type&> const_view() const {return h_view;}
void modify_device()
{
modified_device = true;
if (modified_device && modified_host)
Kokkos::abort("Concurrent modification of host and device hashes");
}
void modify_host()
{
modified_host = true;
if (modified_device && modified_host)
Kokkos::abort("Concurrent modification of host and device hashes");
}
void sync_device()
{
if (modified_host) {
Kokkos::deep_copy(d_view,h_view);
modified_host = false;
}
}
void sync_host()
{
if (modified_device) {
Kokkos::deep_copy(h_view,d_view);
modified_device = false;
}
}
template<class DeviceType>
std::enable_if_t<(std::is_same<DeviceType,LMPDeviceType>::value || Kokkos::SpaceAccessibility<LMPDeviceType::memory_space,LMPHostType::memory_space>::accessible),void> sync() {sync_device();}
template<class DeviceType>
std::enable_if_t<!(std::is_same<DeviceType,LMPDeviceType>::value || Kokkos::SpaceAccessibility<LMPDeviceType::memory_space,LMPHostType::memory_space>::accessible),void> sync() {sync_host();}
};
template <class DeviceType>
@ -601,6 +652,13 @@ typedef tdual_int_scalar::t_dev_const t_int_scalar_const;
typedef tdual_int_scalar::t_dev_um t_int_scalar_um;
typedef tdual_int_scalar::t_dev_const_um t_int_scalar_const_um;
typedef Kokkos::
DualView<LAMMPS_NS::tagint, LMPDeviceType::array_layout, LMPDeviceType> tdual_tagint_scalar;
typedef tdual_tagint_scalar::t_dev t_tagint_scalar;
typedef tdual_tagint_scalar::t_dev_const t_tagint_scalar_const;
typedef tdual_tagint_scalar::t_dev_um t_tagint_scalar_um;
typedef tdual_tagint_scalar::t_dev_const_um t_tagint_scalar_const_um;
typedef Kokkos::
DualView<LMP_FLOAT, LMPDeviceType::array_layout, LMPDeviceType>
tdual_float_scalar;
@ -919,6 +977,12 @@ typedef tdual_int_scalar::t_host_const t_int_scalar_const;
typedef tdual_int_scalar::t_host_um t_int_scalar_um;
typedef tdual_int_scalar::t_host_const_um t_int_scalar_const_um;
typedef Kokkos::DualView<LAMMPS_NS::tagint, LMPDeviceType::array_layout, LMPDeviceType> tdual_tagint_scalar;
typedef tdual_tagint_scalar::t_host t_tagint_scalar;
typedef tdual_tagint_scalar::t_host_const t_tagint_scalar_const;
typedef tdual_tagint_scalar::t_host_um t_tagint_scalar_um;
typedef tdual_tagint_scalar::t_host_const_um t_tagint_scalar_const_um;
typedef Kokkos::DualView<LMP_FLOAT, LMPDeviceType::array_layout, LMPDeviceType> tdual_float_scalar;
typedef tdual_float_scalar::t_host t_float_scalar;
typedef tdual_float_scalar::t_host_const t_float_scalar_const;

View File

@ -50,13 +50,16 @@ NeighBondKokkos<DeviceType>::NeighBondKokkos(LAMMPS *lmp) : Pointers(lmp)
datamask_read = EMPTY_MASK;
datamask_modify = EMPTY_MASK;
k_nlist = DAT::tdual_int_scalar("NeighBond:nlist");
d_nlist = k_nlist.view<DeviceType>();
h_nlist = k_nlist.h_view;
// use 1D view for scalars to reduce GPU memory operations
k_fail_flag = DAT::tdual_int_scalar("NeighBond:fail_flag");
d_fail_flag = k_fail_flag.view<DeviceType>();
h_fail_flag = k_fail_flag.h_view;
d_scalars = typename AT::t_int_1d("NeighBond:scalars",2);
h_scalars = HAT::t_int_1d("NeighBond:scalars_mirror",2);
d_nlist = Kokkos::subview(d_scalars,0);
d_fail_flag = Kokkos::subview(d_scalars,1);
h_nlist = Kokkos::subview(h_scalars,0);
h_fail_flag = Kokkos::subview(h_scalars,1);
maxbond = 0;
maxangle = 0;
@ -240,22 +243,14 @@ void NeighBondKokkos<DeviceType>::bond_all()
do {
nmissing = 0;
h_nlist() = 0;
k_nlist.template modify<LMPHostType>();
k_nlist.template sync<DeviceType>();
h_fail_flag() = 0;
k_fail_flag.template modify<LMPHostType>();
k_fail_flag.template sync<DeviceType>();
Kokkos::deep_copy(d_scalars,0);
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagNeighBondBondAll>(0,nlocal),*this,nmissing);
k_nlist.template modify<DeviceType>();
k_nlist.template sync<LMPHostType>();
Kokkos::deep_copy(h_scalars,d_scalars);
neighbor->nbondlist = h_nlist();
k_fail_flag.template modify<DeviceType>();
k_fail_flag.template sync<LMPHostType>();
if (h_fail_flag()) {
maxbond = neighbor->nbondlist + BONDDELTA;
memoryKK->grow_kokkos(k_bondlist,neighbor->bondlist,maxbond,3,"neighbor:neighbor->bondlist");
@ -327,22 +322,14 @@ void NeighBondKokkos<DeviceType>::bond_partial()
do {
nmissing = 0;
h_nlist() = 0;
k_nlist.template modify<LMPHostType>();
k_nlist.template sync<DeviceType>();
h_fail_flag() = 0;
k_fail_flag.template modify<LMPHostType>();
k_fail_flag.template sync<DeviceType>();
Kokkos::deep_copy(d_scalars,0);
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagNeighBondBondPartial>(0,nlocal),*this,nmissing);
k_nlist.template modify<DeviceType>();
k_nlist.template sync<LMPHostType>();
Kokkos::deep_copy(h_scalars,d_scalars);
neighbor->nbondlist = h_nlist();
k_fail_flag.template modify<DeviceType>();
k_fail_flag.template sync<LMPHostType>();
if (h_fail_flag()) {
maxbond = neighbor->nbondlist + BONDDELTA;
memoryKK->grow_kokkos(k_bondlist,neighbor->bondlist,maxbond,3,"neighbor:neighbor->bondlist");
@ -440,22 +427,14 @@ void NeighBondKokkos<DeviceType>::angle_all()
do {
nmissing = 0;
h_nlist() = 0;
k_nlist.template modify<LMPHostType>();
k_nlist.template sync<DeviceType>();
h_fail_flag() = 0;
k_fail_flag.template modify<LMPHostType>();
k_fail_flag.template sync<DeviceType>();
Kokkos::deep_copy(d_scalars,0);
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagNeighBondAngleAll>(0,nlocal),*this,nmissing);
k_nlist.template modify<DeviceType>();
k_nlist.template sync<LMPHostType>();
Kokkos::deep_copy(h_scalars,d_scalars);
neighbor->nanglelist = h_nlist();
k_fail_flag.template modify<DeviceType>();
k_fail_flag.template sync<LMPHostType>();
if (h_fail_flag()) {
maxangle = neighbor->nanglelist + BONDDELTA;
memoryKK->grow_kokkos(k_anglelist,neighbor->anglelist,maxangle,4,"neighbor:neighbor->anglelist");
@ -534,22 +513,14 @@ void NeighBondKokkos<DeviceType>::angle_partial()
do {
nmissing = 0;
h_nlist() = 0;
k_nlist.template modify<LMPHostType>();
k_nlist.template sync<DeviceType>();
h_fail_flag() = 0;
k_fail_flag.template modify<LMPHostType>();
k_fail_flag.template sync<DeviceType>();
Kokkos::deep_copy(d_scalars,0);
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagNeighBondAnglePartial>(0,nlocal),*this,nmissing);
k_nlist.template modify<DeviceType>();
k_nlist.template sync<LMPHostType>();
Kokkos::deep_copy(h_scalars,d_scalars);
neighbor->nanglelist = h_nlist();
k_fail_flag.template modify<DeviceType>();
k_fail_flag.template sync<LMPHostType>();
if (h_fail_flag()) {
maxangle = neighbor->nanglelist + BONDDELTA;
memoryKK->grow_kokkos(k_anglelist,neighbor->anglelist,maxangle,4,"neighbor:neighbor->anglelist");
@ -667,22 +638,14 @@ void NeighBondKokkos<DeviceType>::dihedral_all()
do {
nmissing = 0;
h_nlist() = 0;
k_nlist.template modify<LMPHostType>();
k_nlist.template sync<DeviceType>();
h_fail_flag() = 0;
k_fail_flag.template modify<LMPHostType>();
k_fail_flag.template sync<DeviceType>();
Kokkos::deep_copy(d_scalars,0);
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagNeighBondDihedralAll>(0,nlocal),*this,nmissing);
k_nlist.template modify<DeviceType>();
k_nlist.template sync<LMPHostType>();
Kokkos::deep_copy(h_scalars,d_scalars);
neighbor->ndihedrallist = h_nlist();
k_fail_flag.template modify<DeviceType>();
k_fail_flag.template sync<LMPHostType>();
if (h_fail_flag()) {
maxdihedral = neighbor->ndihedrallist + BONDDELTA;
memoryKK->grow_kokkos(k_dihedrallist,neighbor->dihedrallist,maxdihedral,5,"neighbor:neighbor->dihedrallist");
@ -766,22 +729,14 @@ void NeighBondKokkos<DeviceType>::dihedral_partial()
do {
nmissing = 0;
h_nlist() = 0;
k_nlist.template modify<LMPHostType>();
k_nlist.template sync<DeviceType>();
h_fail_flag() = 0;
k_fail_flag.template modify<LMPHostType>();
k_fail_flag.template sync<DeviceType>();
Kokkos::deep_copy(d_scalars,0);
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagNeighBondDihedralPartial>(0,nlocal),*this,nmissing);
k_nlist.template modify<DeviceType>();
k_nlist.template sync<LMPHostType>();
Kokkos::deep_copy(h_scalars,d_scalars);
neighbor->ndihedrallist = h_nlist();
k_fail_flag.template modify<DeviceType>();
k_fail_flag.template sync<LMPHostType>();
if (h_fail_flag()) {
maxdihedral = neighbor->ndihedrallist + BONDDELTA;
memoryKK->grow_kokkos(k_dihedrallist,neighbor->dihedrallist,maxdihedral,5,"neighbor:neighbor->dihedrallist");
@ -921,22 +876,14 @@ void NeighBondKokkos<DeviceType>::improper_all()
do {
nmissing = 0;
h_nlist() = 0;
k_nlist.template modify<LMPHostType>();
k_nlist.template sync<DeviceType>();
h_fail_flag() = 0;
k_fail_flag.template modify<LMPHostType>();
k_fail_flag.template sync<DeviceType>();
Kokkos::deep_copy(d_scalars,0);
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagNeighBondImproperAll>(0,nlocal),*this,nmissing);
k_nlist.template modify<DeviceType>();
k_nlist.template sync<LMPHostType>();
Kokkos::deep_copy(h_scalars,d_scalars);
neighbor->nimproperlist = h_nlist();
k_fail_flag.template modify<DeviceType>();
k_fail_flag.template sync<LMPHostType>();
if (h_fail_flag()) {
maximproper = neighbor->nimproperlist + BONDDELTA;
memoryKK->grow_kokkos(k_improperlist,neighbor->improperlist,maximproper,5,"neighbor:neighbor->improperlist");
@ -1020,22 +967,14 @@ void NeighBondKokkos<DeviceType>::improper_partial()
do {
nmissing = 0;
h_nlist() = 0;
k_nlist.template modify<LMPHostType>();
k_nlist.template sync<DeviceType>();
h_fail_flag() = 0;
k_fail_flag.template modify<LMPHostType>();
k_fail_flag.template sync<DeviceType>();
Kokkos::deep_copy(d_scalars,0);
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagNeighBondImproperPartial>(0,nlocal),*this,nmissing);
k_nlist.template modify<DeviceType>();
k_nlist.template sync<LMPHostType>();
Kokkos::deep_copy(h_scalars,d_scalars);
neighbor->nimproperlist = h_nlist();
k_fail_flag.template modify<DeviceType>();
k_fail_flag.template sync<LMPHostType>();
if (h_fail_flag()) {
maximproper = neighbor->nimproperlist + BONDDELTA;
memoryKK->grow_kokkos(k_improperlist,neighbor->improperlist,maximproper,5,"neighbor:neighbor->improperlist");
@ -1221,6 +1160,7 @@ void NeighBondKokkos<DeviceType>::update_class_variables()
k_map_array.template sync<DeviceType>();
} else if (map_style == Atom::MAP_HASH) {
k_map_hash = atomKK->k_map_hash;
k_map_hash.template sync<DeviceType>();
}
}

View File

@ -115,11 +115,10 @@ class NeighBondKokkos : protected Pointers {
typename AT::t_tagint_2d improper_atom1,improper_atom2,
improper_atom3,improper_atom4;
DAT::tdual_int_scalar k_nlist;
typename AT::t_int_1d d_scalars;
HAT::t_int_1d h_scalars;
typename AT::t_int_scalar d_nlist;
HAT::t_int_scalar h_nlist;
DAT::tdual_int_scalar k_fail_flag;
typename AT::t_int_scalar d_fail_flag;
HAT::t_int_scalar h_fail_flag;

View File

@ -137,6 +137,12 @@ struct PairComputeFunctor {
F_FLOAT fytmp = 0.0;
F_FLOAT fztmp = 0.0;
if (NEIGHFLAG == FULL) {
f(i,0) = 0.0;
f(i,1) = 0.0;
f(i,2) = 0.0;
}
for (int jj = 0; jj < jnum; jj++) {
int j = neighbors_i(jj);
const F_FLOAT factor_lj = c.special_lj[sbmask(j)];
@ -205,6 +211,12 @@ struct PairComputeFunctor {
F_FLOAT fytmp = 0.0;
F_FLOAT fztmp = 0.0;
if (NEIGHFLAG == FULL) {
f(i,0) = 0.0;
f(i,1) = 0.0;
f(i,2) = 0.0;
}
for (int jj = 0; jj < jnum; jj++) {
int j = neighbors_i(jj);
const F_FLOAT factor_lj = c.special_lj[sbmask(j)];
@ -767,7 +779,6 @@ EV_FLOAT pair_compute_neighlist (PairStyle* fpair, typename std::enable_if<(NEIG
fpair->lmp->kokkos->neigh_thread = 1;
if (fpair->lmp->kokkos->neigh_thread) {
fpair->fuse_force_clear_flag = 1;
int vector_length = 8;
int atoms_per_team = 32;
@ -805,6 +816,7 @@ template<class PairStyle, class Specialisation>
EV_FLOAT pair_compute (PairStyle* fpair, NeighListKokkos<typename PairStyle::device_type>* list) {
EV_FLOAT ev;
if (fpair->neighflag == FULL) {
fpair->fuse_force_clear_flag = 1;
ev = pair_compute_neighlist<PairStyle,FULL,Specialisation> (fpair,list);
} else if (fpair->neighflag == HALFTHREAD) {
ev = pair_compute_neighlist<PairStyle,HALFTHREAD,Specialisation> (fpair,list);
@ -860,11 +872,7 @@ void pair_virial_fdotr_compute(PairStyle* fpair) {
fpair->virial[5] = virial.v[5];
}
}
#endif
#endif

View File

@ -383,7 +383,7 @@ class Atom : protected Pointers {
// map lookup function inlined for efficiency
// return -1 if no map defined
inline int map(tagint global)
virtual inline int map(tagint global)
{
if (map_style == 1)
return map_array[global];
@ -396,10 +396,10 @@ class Atom : protected Pointers {
virtual void map_init(int check = 1);
virtual void map_clear();
virtual void map_set();
void map_one(tagint, int);
virtual void map_one(tagint, int);
int map_style_set();
virtual void map_delete();
int map_find_hash(tagint);
virtual int map_find_hash(tagint);
protected:
// global to local ID mapping

View File

@ -215,7 +215,7 @@ int FixLangevin::setmask()
if (gjfflag) mask |= INITIAL_INTEGRATE;
mask |= POST_FORCE;
mask |= POST_FORCE_RESPA;
mask |= END_OF_STEP;
if (tallyflag || gjfflag) mask |= END_OF_STEP;
return mask;
}
@ -915,8 +915,6 @@ void FixLangevin::angmom_thermostat()
void FixLangevin::end_of_step()
{
if (!tallyflag && !gjfflag) return;
double **v = atom->v;
int *mask = atom->mask;
int nlocal = atom->nlocal;