Merge pull request #3769 from stanmoore1/kk_optimize
Kokkos package optimizations
This commit is contained in:
@ -150,20 +150,26 @@ void AngleCosineKokkos<DeviceType>::operator()(TagAngleCosineCompute<NEWTON_BOND
|
||||
const int i3 = anglelist(n,2);
|
||||
const int type = anglelist(n,3);
|
||||
|
||||
const F_FLOAT k = d_k[type];
|
||||
|
||||
const F_FLOAT x20 = x(i2,0);
|
||||
const F_FLOAT x21 = x(i2,1);
|
||||
const F_FLOAT x22 = x(i2,2);
|
||||
|
||||
// 1st bond
|
||||
|
||||
const F_FLOAT delx1 = x(i1,0) - x(i2,0);
|
||||
const F_FLOAT dely1 = x(i1,1) - x(i2,1);
|
||||
const F_FLOAT delz1 = x(i1,2) - x(i2,2);
|
||||
const F_FLOAT delx1 = x(i1,0) - x20;
|
||||
const F_FLOAT dely1 = x(i1,1) - x21;
|
||||
const F_FLOAT delz1 = x(i1,2) - x22;
|
||||
|
||||
const F_FLOAT rsq1 = delx1*delx1 + dely1*dely1 + delz1*delz1;
|
||||
const F_FLOAT r1 = sqrt(rsq1);
|
||||
|
||||
// 2nd bond
|
||||
|
||||
const F_FLOAT delx2 = x(i3,0) - x(i2,0);
|
||||
const F_FLOAT dely2 = x(i3,1) - x(i2,1);
|
||||
const F_FLOAT delz2 = x(i3,2) - x(i2,2);
|
||||
const F_FLOAT delx2 = x(i3,0) - x20;
|
||||
const F_FLOAT dely2 = x(i3,1) - x21;
|
||||
const F_FLOAT delz2 = x(i3,2) - x22;
|
||||
|
||||
const F_FLOAT rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2;
|
||||
const F_FLOAT r2 = sqrt(rsq2);
|
||||
@ -178,9 +184,9 @@ void AngleCosineKokkos<DeviceType>::operator()(TagAngleCosineCompute<NEWTON_BOND
|
||||
// force & energy
|
||||
|
||||
F_FLOAT eangle = 0.0;
|
||||
if (eflag) eangle = d_k[type]*(1.0+c);
|
||||
if (eflag) eangle = k*(1.0+c);
|
||||
|
||||
const F_FLOAT a = d_k[type];
|
||||
const F_FLOAT a = k;
|
||||
const F_FLOAT a11 = a*c / rsq1;
|
||||
const F_FLOAT a12 = -a / (r1*r2);
|
||||
const F_FLOAT a22 = a*c / rsq2;
|
||||
|
||||
@ -26,16 +26,24 @@
|
||||
#include "modify.h"
|
||||
#include "fix.h"
|
||||
|
||||
#include <Kokkos_Sort.hpp>
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
AtomKokkos::AtomKokkos(LAMMPS *lmp) : Atom(lmp)
|
||||
{
|
||||
k_error_flag = DAT::tdual_int_scalar("atom:error_flag");
|
||||
avecKK = nullptr;
|
||||
|
||||
k_error_flag = DAT::tdual_int_scalar("atom:error_flag");
|
||||
|
||||
d_tag_min_max = t_tagint_2(Kokkos::NoInit("atom:tag_min_max"));
|
||||
h_tag_min_max = t_host_tagint_2(Kokkos::NoInit("atom:tag_min_max"));
|
||||
|
||||
d_tag_min = Kokkos::subview(d_tag_min_max,0);
|
||||
d_tag_max = Kokkos::subview(d_tag_min_max,1);
|
||||
|
||||
h_tag_min = Kokkos::subview(h_tag_min_max,0);
|
||||
h_tag_max = Kokkos::subview(h_tag_min_max,1);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
@ -15,6 +15,8 @@
|
||||
#include "atom.h" // IWYU pragma: export
|
||||
#include "kokkos_type.h"
|
||||
|
||||
#include <Kokkos_Sort.hpp>
|
||||
|
||||
#ifndef LMP_ATOM_KOKKOS_H
|
||||
#define LMP_ATOM_KOKKOS_H
|
||||
|
||||
@ -73,19 +75,50 @@ class AtomKokkos : public Atom {
|
||||
~AtomKokkos() override;
|
||||
|
||||
void map_init(int check = 1) override;
|
||||
void map_clear() override;
|
||||
void map_set() override;
|
||||
void map_one(tagint, int) override;
|
||||
void map_delete() override;
|
||||
int map_find_hash(tagint) override;
|
||||
|
||||
DAT::tdual_int_scalar k_error_flag;
|
||||
DAT::tdual_int_1d k_sametag;
|
||||
DAT::tdual_int_1d k_map_array;
|
||||
DAT::tdual_int_scalar k_error_flag;
|
||||
dual_hash_type k_map_hash;
|
||||
|
||||
DAT::t_tagint_1d d_tag_sorted;
|
||||
DAT::t_int_1d d_i_sorted;
|
||||
|
||||
typedef Kokkos::DualView<tagint[2], LMPDeviceType::array_layout, LMPDeviceType> tdual_tagint_2;
|
||||
typedef tdual_tagint_2::t_dev t_tagint_2;
|
||||
typedef tdual_tagint_2::t_host t_host_tagint_2;
|
||||
|
||||
t_tagint_2 d_tag_min_max;
|
||||
t_host_tagint_2 h_tag_min_max;
|
||||
|
||||
DAT::t_tagint_scalar d_tag_min,d_tag_max;
|
||||
HAT::t_tagint_scalar h_tag_min,h_tag_max;
|
||||
|
||||
using MapKeyViewType = decltype(d_tag_sorted);
|
||||
using BinOpMap = Kokkos::BinOp1D<MapKeyViewType>;
|
||||
Kokkos::BinSort<MapKeyViewType, BinOpMap> Sorter;
|
||||
|
||||
class AtomVecKokkos* avecKK;
|
||||
|
||||
// map lookup function inlined for efficiency
|
||||
// return -1 if no map defined
|
||||
|
||||
inline int map(tagint global) override
|
||||
{
|
||||
if (map_style == 1) {
|
||||
k_map_array.sync_host();
|
||||
return map_array[global];
|
||||
} else if (map_style == 2)
|
||||
return map_find_hash(global);
|
||||
else
|
||||
return -1;
|
||||
};
|
||||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static int map_kokkos(tagint global, int map_style, const DAT::tdual_int_1d &k_map_array, const dual_hash_type &k_map_hash)
|
||||
|
||||
@ -34,8 +34,6 @@ using namespace LAMMPS_NS;
|
||||
set entire array to -1 as initial values
|
||||
for hash option:
|
||||
map_nhash = length of hash table
|
||||
map_nbucket = # of hash buckets, prime larger than map_nhash * 2
|
||||
so buckets will only be filled with 0 or 1 atoms on average
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void AtomKokkos::map_init(int check)
|
||||
@ -58,15 +56,7 @@ void AtomKokkos::map_init(int check)
|
||||
// for hash, set all buckets to empty, put all entries in free list
|
||||
|
||||
if (!recreate) {
|
||||
if (map_style == MAP_ARRAY) {
|
||||
for (int i = 0; i <= map_tag_max; i++) map_array[i] = -1;
|
||||
} else {
|
||||
for (int i = 0; i < map_nbucket; i++) map_bucket[i] = -1;
|
||||
map_nused = 0;
|
||||
map_free = 0;
|
||||
for (int i = 0; i < map_nhash; i++) map_hash[i].next = i + 1;
|
||||
if (map_nhash > 0) map_hash[map_nhash - 1].next = -1;
|
||||
}
|
||||
map_clear();
|
||||
|
||||
// recreating: delete old map and create new one for array or hash
|
||||
|
||||
@ -76,7 +66,8 @@ void AtomKokkos::map_init(int check)
|
||||
if (map_style == MAP_ARRAY) {
|
||||
map_maxarray = map_tag_max;
|
||||
memoryKK->create_kokkos(k_map_array, map_array, map_maxarray + 1, "atom:map_array");
|
||||
for (int i = 0; i <= map_tag_max; i++) map_array[i] = -1;
|
||||
Kokkos::deep_copy(k_map_array.d_view,-1);
|
||||
k_map_array.modify_device();
|
||||
|
||||
} else {
|
||||
|
||||
@ -90,35 +81,26 @@ void AtomKokkos::map_init(int check)
|
||||
map_nhash *= 2;
|
||||
map_nhash = MAX(map_nhash, 1000);
|
||||
|
||||
// map_nbucket = prime just larger than map_nhash
|
||||
// next_prime() should be fast enough,
|
||||
// about 10% of odd integers are prime above 1M
|
||||
|
||||
map_nbucket = next_prime(map_nhash);
|
||||
|
||||
// set all buckets to empty
|
||||
// set hash to map_nhash in length
|
||||
// put all hash entries in free list and point them to each other
|
||||
|
||||
map_bucket = new int[map_nbucket];
|
||||
for (int i = 0; i < map_nbucket; i++) map_bucket[i] = -1;
|
||||
|
||||
map_hash = new HashElem[map_nhash];
|
||||
map_nused = 0;
|
||||
map_free = 0;
|
||||
for (int i = 0; i < map_nhash; i++) map_hash[i].next = i + 1;
|
||||
map_hash[map_nhash - 1].next = -1;
|
||||
|
||||
// use "view" template method to avoid unnecessary deep_copy
|
||||
|
||||
auto h_map_hash = k_map_hash.view<LMPHostType>(); // get type
|
||||
h_map_hash = decltype(h_map_hash)(map_nhash);
|
||||
k_map_hash.view<LMPHostType>() = h_map_hash;
|
||||
k_map_hash = dual_hash_type(map_nhash);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
k_sametag.modify_host();
|
||||
if (map_style == Atom::MAP_ARRAY) k_map_array.modify_host();
|
||||
/* ----------------------------------------------------------------------
|
||||
clear global -> local map for all of my own and ghost atoms
|
||||
for hash table option:
|
||||
global ID may not be in table if image atom was already cleared
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void AtomKokkos::map_clear()
|
||||
{
|
||||
if (map_style == Atom::MAP_ARRAY) {
|
||||
Kokkos::deep_copy(k_map_array.d_view,-1);
|
||||
k_map_array.modify_device();
|
||||
} else {
|
||||
k_map_hash.d_view.clear();
|
||||
k_map_hash.modify_device();
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
@ -135,28 +117,16 @@ void AtomKokkos::map_set()
|
||||
{
|
||||
int nall = nlocal + nghost;
|
||||
|
||||
atomKK->sync(Host, TAG_MASK);
|
||||
// possible reallocation of sametag must come before loop over atoms
|
||||
// since loop sets sametag
|
||||
|
||||
k_sametag.sync_host();
|
||||
if (map_style == Atom::MAP_ARRAY) k_map_array.sync_host();
|
||||
if (nall > max_same) {
|
||||
max_same = nall + EXTRA;
|
||||
memoryKK->destroy_kokkos(k_sametag, sametag);
|
||||
memoryKK->create_kokkos(k_sametag, sametag, max_same, "atom:sametag");
|
||||
}
|
||||
|
||||
if (map_style == MAP_ARRAY) {
|
||||
|
||||
// possible reallocation of sametag must come before loop over atoms
|
||||
// since loop sets sametag
|
||||
|
||||
if (nall > max_same) {
|
||||
max_same = nall + EXTRA;
|
||||
memoryKK->destroy_kokkos(k_sametag, sametag);
|
||||
memoryKK->create_kokkos(k_sametag, sametag, max_same, "atom:sametag");
|
||||
}
|
||||
|
||||
for (int i = nall - 1; i >= 0; i--) {
|
||||
sametag[i] = map_array[tag[i]];
|
||||
map_array[tag[i]] = i;
|
||||
}
|
||||
|
||||
} else {
|
||||
if (map_style == MAP_HASH) {
|
||||
|
||||
// if this proc has more atoms than hash table size, call map_init()
|
||||
// call with 0 since max atomID in system has not changed
|
||||
@ -164,107 +134,191 @@ void AtomKokkos::map_set()
|
||||
// b/c map_init() may invoke map_delete(), whacking sametag
|
||||
|
||||
if (nall > map_nhash) map_init(0);
|
||||
if (nall > max_same) {
|
||||
max_same = nall + EXTRA;
|
||||
memoryKK->destroy_kokkos(k_sametag, sametag);
|
||||
memoryKK->create_kokkos(k_sametag, sametag, max_same, "atom:sametag");
|
||||
}
|
||||
|
||||
int previous, ibucket, index;
|
||||
tagint global;
|
||||
|
||||
for (int i = nall - 1; i >= 0; i--) {
|
||||
sametag[i] = map_find_hash(tag[i]);
|
||||
|
||||
// search for key
|
||||
// if found it, just overwrite local value with index
|
||||
|
||||
previous = -1;
|
||||
global = tag[i];
|
||||
ibucket = global % map_nbucket;
|
||||
index = map_bucket[ibucket];
|
||||
while (index > -1) {
|
||||
if (map_hash[index].global == global) break;
|
||||
previous = index;
|
||||
index = map_hash[index].next;
|
||||
}
|
||||
if (index > -1) {
|
||||
map_hash[index].local = i;
|
||||
continue;
|
||||
}
|
||||
|
||||
// take one entry from free list
|
||||
// add the new global/local pair as entry at end of bucket list
|
||||
// special logic if this entry is 1st in bucket
|
||||
|
||||
index = map_free;
|
||||
map_free = map_hash[map_free].next;
|
||||
if (previous == -1)
|
||||
map_bucket[ibucket] = index;
|
||||
else
|
||||
map_hash[previous].next = index;
|
||||
map_hash[index].global = global;
|
||||
map_hash[index].local = i;
|
||||
map_hash[index].next = -1;
|
||||
map_nused++;
|
||||
}
|
||||
|
||||
// Copy to Kokkos hash
|
||||
|
||||
// use "view" template method to avoid unnecessary deep_copy
|
||||
|
||||
auto h_map_hash = k_map_hash.view<LMPHostType>();
|
||||
h_map_hash.clear();
|
||||
|
||||
for (int i = nall - 1; i >= 0; i--) {
|
||||
|
||||
// search for key
|
||||
// if don't find it, done
|
||||
|
||||
previous = -1;
|
||||
global = tag[i];
|
||||
ibucket = global % map_nbucket;
|
||||
index = map_bucket[ibucket];
|
||||
while (index > -1) {
|
||||
if (map_hash[index].global == global) break;
|
||||
previous = index;
|
||||
index = map_hash[index].next;
|
||||
}
|
||||
if (index == -1) continue;
|
||||
|
||||
int local = map_hash[index].local;
|
||||
|
||||
auto insert_result = h_map_hash.insert(global, local);
|
||||
if (insert_result.failed()) error->one(FLERR, "Kokkos::UnorderedMap insertion failed");
|
||||
}
|
||||
}
|
||||
|
||||
k_sametag.modify_host();
|
||||
if (map_style == Atom::MAP_ARRAY)
|
||||
k_map_array.modify_host();
|
||||
else if (map_style == Atom::MAP_HASH) {
|
||||
atomKK->sync(Device, TAG_MASK);
|
||||
|
||||
// use "view" template method to avoid unnecessary deep_copy
|
||||
auto d_tag = atomKK->k_tag.d_view;
|
||||
auto d_sametag = k_sametag.d_view;
|
||||
|
||||
auto h_map_hash = k_map_hash.view<LMPHostType>();
|
||||
auto d_map_hash = k_map_hash.view<LMPDeviceType>();
|
||||
// sort by tag
|
||||
|
||||
// check if fix shake or neigh bond needs a device hash
|
||||
int nmax = atom->nmax;
|
||||
|
||||
int device_hash_flag = 0;
|
||||
|
||||
auto neighborKK = (NeighborKokkos *) neighbor;
|
||||
if (neighborKK->device_flag) device_hash_flag = 1;
|
||||
|
||||
for (int n = 0; n < modify->nfix; n++)
|
||||
if (utils::strmatch(modify->fix[n]->style, "^shake"))
|
||||
if (modify->fix[n]->execution_space == Device) device_hash_flag = 1;
|
||||
|
||||
if (device_hash_flag) {
|
||||
Kokkos::deep_copy(d_map_hash, h_map_hash);
|
||||
k_map_hash.view<LMPDeviceType>() = d_map_hash;
|
||||
}
|
||||
int realloc_flag = 0;
|
||||
if (d_tag_sorted.extent(0) < nmax) {
|
||||
MemKK::realloc_kokkos(d_tag_sorted,"atom:tag_sorted",nmax);
|
||||
MemKK::realloc_kokkos(d_i_sorted,"atom:i_sorted",nmax);
|
||||
realloc_flag = 1;
|
||||
}
|
||||
|
||||
h_tag_min() = MAXTAGINT;
|
||||
h_tag_max() = 0;
|
||||
|
||||
Kokkos::deep_copy(d_tag_min_max,h_tag_min_max);
|
||||
|
||||
auto l_tag_sorted = d_tag_sorted;
|
||||
auto l_i_sorted = d_i_sorted;
|
||||
auto l_tag_min = d_tag_min;
|
||||
auto l_tag_max = d_tag_max;
|
||||
int map_style_array = (map_style == MAP_ARRAY);
|
||||
|
||||
Kokkos::parallel_for(nall, LAMMPS_LAMBDA(int i) {
|
||||
l_i_sorted(i) = i;
|
||||
tagint tag_i = d_tag(i);
|
||||
l_tag_sorted(i) = tag_i;
|
||||
Kokkos::atomic_min(&l_tag_min(),tag_i);
|
||||
Kokkos::atomic_max(&l_tag_max(),tag_i);
|
||||
});
|
||||
|
||||
Kokkos::deep_copy(h_tag_min_max,d_tag_min_max);
|
||||
|
||||
tagint min = h_tag_min();
|
||||
tagint max = h_tag_max();
|
||||
|
||||
using MapKeyViewType = decltype(d_tag_sorted);
|
||||
using BinOpMap = Kokkos::BinOp1D<MapKeyViewType>;
|
||||
|
||||
auto binner = BinOpMap(nall, min, max);
|
||||
|
||||
if (!Sorter.bin_offsets.data() || realloc_flag) {
|
||||
Sorter = Kokkos::BinSort<MapKeyViewType, BinOpMap>(d_tag_sorted, 0, nall, binner, true);
|
||||
MemKK::realloc_kokkos(Sorter.bin_count_atomic,"Kokkos::SortImpl::BinSortFunctor::bin_count",nmax+1);
|
||||
Kokkos::deep_copy(Sorter.bin_count_atomic,0);
|
||||
Sorter.bin_count_const = Sorter.bin_count_atomic;
|
||||
MemKK::realloc_kokkos(Sorter.bin_offsets,"Kokkos::SortImpl::BinSortFunctor::bin_offsets",nmax+1);
|
||||
MemKK::realloc_kokkos(Sorter.sort_order,"Kokkos::SortImpl::BinSortFunctor::sort_order",nmax);
|
||||
} else {
|
||||
Kokkos::deep_copy(Sorter.bin_count_atomic,0);
|
||||
Sorter.bin_op = binner;
|
||||
Sorter.range_begin = 0;
|
||||
Sorter.range_end = nall;
|
||||
}
|
||||
|
||||
Sorter.create_permute_vector(LMPDeviceType());
|
||||
Sorter.sort(LMPDeviceType(), d_tag_sorted, 0, nall);
|
||||
Sorter.sort(LMPDeviceType(), d_i_sorted, 0, nall);
|
||||
|
||||
auto d_map_array = k_map_array.d_view;
|
||||
auto d_map_hash = k_map_hash.d_view;
|
||||
d_map_hash.clear();
|
||||
|
||||
auto d_error_flag = k_error_flag.d_view;
|
||||
Kokkos::deep_copy(d_error_flag,0);
|
||||
|
||||
// for each tag find:
|
||||
// neighboring atoms with closest local id for sametag
|
||||
// atom with smallest local id for atom map
|
||||
|
||||
Kokkos::parallel_for(nall, LAMMPS_LAMBDA(int ii) {
|
||||
const int i = l_i_sorted(ii);
|
||||
const tagint tag_i = l_tag_sorted(ii);
|
||||
|
||||
int i_min = i;
|
||||
int i_closest = MAXTAGINT;
|
||||
|
||||
// search atoms with same tag in the forward direction
|
||||
|
||||
int jj = ii+1;
|
||||
int closest_flag = 0;
|
||||
|
||||
while (jj < nall) {
|
||||
const tagint tag_j = l_tag_sorted(jj);
|
||||
if (tag_j != tag_i) break;
|
||||
const int j = l_i_sorted(jj);
|
||||
i_min = MIN(i_min,j);
|
||||
if (j > i) {
|
||||
i_closest = MIN(i_closest,j);
|
||||
closest_flag = 1;
|
||||
}
|
||||
jj++;
|
||||
}
|
||||
|
||||
// search atoms with same tag in the reverse direction
|
||||
|
||||
jj = ii-1;
|
||||
|
||||
while (jj >= 0) {
|
||||
const tagint tag_j = l_tag_sorted(jj);
|
||||
if (tag_j != tag_i) break;
|
||||
const int j = l_i_sorted(jj);
|
||||
i_min = MIN(i_min,j);
|
||||
if (j > i) {
|
||||
i_closest = MIN(i_closest,j);
|
||||
closest_flag = 1;
|
||||
}
|
||||
jj--;
|
||||
}
|
||||
|
||||
if (!closest_flag)
|
||||
i_closest = -1;
|
||||
|
||||
d_sametag(i) = i_closest;
|
||||
|
||||
if (i == i_min) {
|
||||
if (map_style_array)
|
||||
d_map_array(tag_i) = i_min;
|
||||
else {
|
||||
auto insert_result = d_map_hash.insert(tag_i, i_min);
|
||||
if (insert_result.failed()) d_error_flag() = 1;
|
||||
}
|
||||
}
|
||||
|
||||
});
|
||||
|
||||
auto h_error_flag = k_error_flag.h_view;
|
||||
Kokkos::deep_copy(h_error_flag,d_error_flag);
|
||||
|
||||
if (h_error_flag())
|
||||
error->one(FLERR,"Failed to insert into Kokkos hash atom map");
|
||||
|
||||
k_sametag.modify_device();
|
||||
|
||||
if (map_style == MAP_ARRAY)
|
||||
k_map_array.modify_device();
|
||||
else
|
||||
k_map_hash.modify_device();
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
set global to local map for one atom
|
||||
for hash table option:
|
||||
global ID may already be in table if atom was already set
|
||||
called by Special class
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void AtomKokkos::map_one(tagint global, int local)
|
||||
{
|
||||
if (map_style == MAP_ARRAY) {
|
||||
k_map_array.sync_host();
|
||||
k_map_array.h_view[global] = local;
|
||||
} else {
|
||||
k_map_hash.sync_host();
|
||||
auto& h_map_hash = k_map_hash.h_view;
|
||||
|
||||
auto insert_result = h_map_hash.insert(global, local);
|
||||
if (insert_result.existing())
|
||||
h_map_hash.value_at(h_map_hash.find(global)) = local;
|
||||
else if (insert_result.failed())
|
||||
error->one(FLERR,"Failed to insert into Kokkos hash atom map");
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
lookup global ID in hash table, return local index
|
||||
called by map() in atom.h
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
int AtomKokkos::map_find_hash(tagint global)
|
||||
{
|
||||
k_map_hash.sync_host();
|
||||
auto& h_map_hash = k_map_hash.h_view;
|
||||
|
||||
int local = -1;
|
||||
auto index = h_map_hash.find(global);
|
||||
if (h_map_hash.valid_at(index))
|
||||
local = h_map_hash.value_at(index);
|
||||
return local;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
@ -279,10 +333,6 @@ void AtomKokkos::map_delete()
|
||||
if (map_style == MAP_ARRAY) {
|
||||
memoryKK->destroy_kokkos(k_map_array, map_array);
|
||||
map_array = nullptr;
|
||||
} else {
|
||||
k_map_hash.h_view = host_hash_type();
|
||||
k_map_hash.d_view = hash_type();
|
||||
}
|
||||
|
||||
Atom::map_delete();
|
||||
} else
|
||||
k_map_hash = dual_hash_type();
|
||||
}
|
||||
|
||||
@ -35,7 +35,7 @@ AtomVecKokkos::AtomVecKokkos(LAMMPS *lmp) : AtomVec(lmp)
|
||||
unpack_exchange_indices_flag = 0;
|
||||
size_exchange = 0;
|
||||
|
||||
k_count = DAT::tdual_int_1d("atom::k_count",1);
|
||||
k_count = DAT::tdual_int_1d("atom:k_count",1);
|
||||
atomKK = (AtomKokkos *) atom;
|
||||
commKK = (CommKokkos *) comm;
|
||||
}
|
||||
|
||||
@ -43,13 +43,8 @@ BondFENEKokkos<DeviceType>::BondFENEKokkos(LAMMPS *lmp) : BondFENE(lmp)
|
||||
datamask_read = X_MASK | F_MASK | ENERGY_MASK | VIRIAL_MASK;
|
||||
datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK;
|
||||
|
||||
k_warning_flag = DAT::tdual_int_scalar("Bond:warning_flag");
|
||||
d_warning_flag = k_warning_flag.view<DeviceType>();
|
||||
h_warning_flag = k_warning_flag.h_view;
|
||||
|
||||
k_error_flag = DAT::tdual_int_scalar("Bond:error_flag");
|
||||
d_error_flag = k_error_flag.view<DeviceType>();
|
||||
h_error_flag = k_error_flag.h_view;
|
||||
d_flag = typename AT::t_int_scalar("bond:flag");
|
||||
h_flag = HAT::t_int_scalar("bond:flag_mirror");
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
@ -99,13 +94,7 @@ void BondFENEKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
||||
nlocal = atom->nlocal;
|
||||
newton_bond = force->newton_bond;
|
||||
|
||||
h_warning_flag() = 0;
|
||||
k_warning_flag.template modify<LMPHostType>();
|
||||
k_warning_flag.template sync<DeviceType>();
|
||||
|
||||
h_error_flag() = 0;
|
||||
k_error_flag.template modify<LMPHostType>();
|
||||
k_error_flag.template sync<DeviceType>();
|
||||
Kokkos::deep_copy(d_flag,0);
|
||||
|
||||
copymode = 1;
|
||||
|
||||
@ -127,14 +116,11 @@ void BondFENEKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
||||
}
|
||||
}
|
||||
|
||||
k_warning_flag.template modify<DeviceType>();
|
||||
k_warning_flag.template sync<LMPHostType>();
|
||||
if (h_warning_flag())
|
||||
error->warning(FLERR,"FENE bond too long");
|
||||
Kokkos::deep_copy(h_flag,d_flag);
|
||||
|
||||
k_error_flag.template modify<DeviceType>();
|
||||
k_error_flag.template sync<LMPHostType>();
|
||||
if (h_error_flag())
|
||||
if (h_flag() == 1)
|
||||
error->warning(FLERR,"FENE bond too long");
|
||||
else if (h_flag() == 2)
|
||||
error->one(FLERR,"Bad FENE bond");
|
||||
|
||||
if (eflag_global) energy += ev.evdwl;
|
||||
@ -165,8 +151,6 @@ template<int NEWTON_BOND, int EVFLAG>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void BondFENEKokkos<DeviceType>::operator()(TagBondFENECompute<NEWTON_BOND,EVFLAG>, const int &n, EV_FLOAT& ev) const {
|
||||
|
||||
if (d_error_flag()) return;
|
||||
|
||||
// The f array is atomic
|
||||
Kokkos::View<F_FLOAT*[3], typename DAT::t_f_array::array_layout,typename KKDevice<DeviceType>::value,Kokkos::MemoryTraits<Kokkos::Atomic|Kokkos::Unmanaged> > a_f = f;
|
||||
|
||||
@ -178,10 +162,15 @@ void BondFENEKokkos<DeviceType>::operator()(TagBondFENECompute<NEWTON_BOND,EVFLA
|
||||
const F_FLOAT dely = x(i1,1) - x(i2,1);
|
||||
const F_FLOAT delz = x(i1,2) - x(i2,2);
|
||||
|
||||
const F_FLOAT r0 = d_r0[type];
|
||||
const F_FLOAT k = d_k[type];
|
||||
const F_FLOAT sigma = d_sigma[type];
|
||||
const F_FLOAT epsilon = d_epsilon[type];
|
||||
|
||||
// force from log term
|
||||
|
||||
const F_FLOAT rsq = delx*delx + dely*dely + delz*delz;
|
||||
const F_FLOAT r0sq = d_r0[type] * d_r0[type];
|
||||
const F_FLOAT r0sq = r0 * r0;
|
||||
F_FLOAT rlogarg = 1.0 - rsq/r0sq;
|
||||
|
||||
// if r -> r0, then rlogarg < 0.0 which is an error
|
||||
@ -189,31 +178,32 @@ void BondFENEKokkos<DeviceType>::operator()(TagBondFENECompute<NEWTON_BOND,EVFLA
|
||||
// if r > 2*r0 something serious is wrong, abort
|
||||
|
||||
if (rlogarg < 0.1) {
|
||||
if (!d_warning_flag())
|
||||
d_warning_flag() = 1;
|
||||
if (rlogarg <= -3.0 && !d_error_flag())
|
||||
d_error_flag() = 1;
|
||||
if (rlogarg <= -3.0)
|
||||
d_flag() = 2;
|
||||
else
|
||||
d_flag() = 1;
|
||||
rlogarg = 0.1;
|
||||
}
|
||||
|
||||
F_FLOAT fbond = -d_k[type]/rlogarg;
|
||||
F_FLOAT fbond = -k/rlogarg;
|
||||
|
||||
// force from LJ term
|
||||
|
||||
F_FLOAT sr6 = 0.0;
|
||||
if (rsq < MY_CUBEROOT2*d_sigma[type]*d_sigma[type]) {
|
||||
const F_FLOAT sr2 = d_sigma[type]*d_sigma[type]/rsq;
|
||||
F_FLOAT sigma2 = sigma*sigma;
|
||||
if (rsq < MY_CUBEROOT2*sigma2) {
|
||||
const F_FLOAT sr2 = sigma2/rsq;
|
||||
sr6 = sr2*sr2*sr2;
|
||||
fbond += 48.0*d_epsilon[type]*sr6*(sr6-0.5)/rsq;
|
||||
fbond += 48.0*epsilon*sr6*(sr6-0.5)/rsq;
|
||||
}
|
||||
|
||||
// energy
|
||||
|
||||
F_FLOAT ebond = 0.0;
|
||||
if (eflag) {
|
||||
ebond = -0.5 * d_k[type]*r0sq*log(rlogarg);
|
||||
if (rsq < MY_CUBEROOT2*d_sigma[type]*d_sigma[type])
|
||||
ebond += 4.0*d_epsilon[type]*sr6*(sr6-1.0) + d_epsilon[type];
|
||||
ebond = -0.5 * k*r0sq*log(rlogarg);
|
||||
if (rsq < MY_CUBEROOT2*sigma2)
|
||||
ebond += 4.0*epsilon*sr6*(sr6-1.0) + epsilon;
|
||||
}
|
||||
|
||||
// apply force to each of 2 atoms
|
||||
|
||||
@ -71,13 +71,8 @@ class BondFENEKokkos : public BondFENE {
|
||||
typename ArrayTypes<DeviceType>::t_efloat_1d d_eatom;
|
||||
typename ArrayTypes<DeviceType>::t_virial_array d_vatom;
|
||||
|
||||
DAT::tdual_int_scalar k_warning_flag;
|
||||
typename AT::t_int_scalar d_warning_flag;
|
||||
HAT::t_int_scalar h_warning_flag;
|
||||
|
||||
DAT::tdual_int_scalar k_error_flag;
|
||||
typename AT::t_int_scalar d_error_flag;
|
||||
HAT::t_int_scalar h_error_flag;
|
||||
typename AT::t_int_scalar d_flag;
|
||||
HAT::t_int_scalar h_flag;
|
||||
|
||||
int nlocal,newton_bond;
|
||||
int eflag,vflag;
|
||||
|
||||
@ -729,13 +729,8 @@ void CommKokkos::exchange_device()
|
||||
double lo,hi;
|
||||
MPI_Request request;
|
||||
|
||||
// clear global->local map for owned and ghost atoms
|
||||
// b/c atoms migrate to new procs in exchange() and
|
||||
// new ghosts are created in borders()
|
||||
// map_set() is done at end of borders()
|
||||
// clear ghost count and any ghost bonus data internal to AtomVec
|
||||
|
||||
if (map_style != Atom::MAP_NONE) atom->map_clear();
|
||||
atom->nghost = 0;
|
||||
atom->avec->clear_bonus();
|
||||
|
||||
@ -1275,10 +1270,8 @@ void CommKokkos::borders_device() {
|
||||
|
||||
// reset global->local map
|
||||
|
||||
if (map_style != Atom::MAP_NONE) {
|
||||
atomKK->sync(Host,TAG_MASK);
|
||||
if (map_style != Atom::MAP_NONE)
|
||||
atom->map_set();
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
|
||||
@ -571,9 +571,11 @@ void DomainKokkos::lamda2x(int n)
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void DomainKokkos::operator()(TagDomain_lamda2x, const int &i) const {
|
||||
x(i,0) = h[0]*x(i,0) + h[5]*x(i,1) + h[4]*x(i,2) + boxlo[0];
|
||||
x(i,1) = h[1]*x(i,1) + h[3]*x(i,2) + boxlo[1];
|
||||
x(i,2) = h[2]*x(i,2) + boxlo[2];
|
||||
const double xi1 = x(i,1);
|
||||
const double xi2 = x(i,2);
|
||||
x(i,0) = h[0]*x(i,0) + h[5]*xi1 + h[4]*xi2 + boxlo[0];
|
||||
x(i,1) = h[1]*xi1 + h[3]*xi2 + boxlo[1];
|
||||
x(i,2) = h[2]*xi2 + boxlo[2];
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
|
||||
@ -684,8 +684,6 @@ void FixLangevinKokkos<DeviceType>::zero_force_item(int i) const
|
||||
template<class DeviceType>
|
||||
void FixLangevinKokkos<DeviceType>::compute_target()
|
||||
{
|
||||
atomKK->sync(Host, MASK_MASK);
|
||||
mask = atomKK->k_mask.template view<DeviceType>();
|
||||
int nlocal = atomKK->nlocal;
|
||||
|
||||
double delta = update->ntimestep - update->beginstep;
|
||||
@ -710,12 +708,14 @@ void FixLangevinKokkos<DeviceType>::compute_target()
|
||||
memoryKK->destroy_kokkos(k_tforce,tforce);
|
||||
memoryKK->create_kokkos(k_tforce,tforce,maxatom2,"langevin:tforce");
|
||||
d_tforce = k_tforce.template view<DeviceType>();
|
||||
h_tforce = k_tforce.template view<LMPHostType>();
|
||||
h_tforce = k_tforce.h_view;
|
||||
}
|
||||
input->variable->compute_atom(tvar,igroup,tforce,1,0); // tforce is modified on host
|
||||
k_tforce.template modify<LMPHostType>();
|
||||
k_tforce.modify_host();
|
||||
atomKK->sync(Host, MASK_MASK);
|
||||
auto h_mask = atomKK->k_mask.h_view;
|
||||
for (int i = 0; i < nlocal; i++)
|
||||
if (mask[i] & groupbit)
|
||||
if (h_mask[i] & groupbit)
|
||||
if (h_tforce[i] < 0.0)
|
||||
error->one(FLERR,
|
||||
"Fix langevin variable returned negative temperature");
|
||||
|
||||
@ -221,6 +221,7 @@ void FixShakeKokkos<DeviceType>::pre_neighbor()
|
||||
k_map_array.template sync<DeviceType>();
|
||||
} else if (map_style == Atom::MAP_HASH) {
|
||||
k_map_hash = atomKK->k_map_hash;
|
||||
k_map_hash.template sync<DeviceType>();
|
||||
}
|
||||
|
||||
k_shake_flag.sync<DeviceType>();
|
||||
@ -248,6 +249,7 @@ void FixShakeKokkos<DeviceType>::pre_neighbor()
|
||||
k_map_array.template sync<DeviceType>();
|
||||
} else if (map_style == Atom::MAP_HASH) {
|
||||
k_map_hash = atomKK->k_map_hash;
|
||||
k_map_hash.template sync<DeviceType>();
|
||||
}
|
||||
|
||||
atomKK->k_sametag.sync<DeviceType>();
|
||||
@ -357,6 +359,7 @@ void FixShakeKokkos<DeviceType>::post_force(int vflag)
|
||||
k_map_array.template sync<DeviceType>();
|
||||
} else if (map_style == Atom::MAP_HASH) {
|
||||
k_map_hash = atomKK->k_map_hash;
|
||||
k_map_hash.template sync<DeviceType>();
|
||||
}
|
||||
|
||||
if (d_rmass.data())
|
||||
|
||||
@ -570,6 +570,21 @@ struct dual_hash_type {
|
||||
hash_type d_view;
|
||||
host_hash_type h_view;
|
||||
|
||||
bool modified_device;
|
||||
bool modified_host;
|
||||
|
||||
dual_hash_type() {
|
||||
modified_device = modified_host = false;
|
||||
d_view = hash_type();
|
||||
h_view = host_hash_type();
|
||||
}
|
||||
|
||||
dual_hash_type(int capacity) {
|
||||
modified_device = modified_host = false;
|
||||
d_view = hash_type(capacity);
|
||||
h_view = host_hash_type(capacity);
|
||||
}
|
||||
|
||||
template<class DeviceType>
|
||||
std::enable_if_t<(std::is_same<DeviceType,LMPDeviceType>::value || Kokkos::SpaceAccessibility<LMPDeviceType::memory_space,LMPHostType::memory_space>::accessible),hash_type&> view() {return d_view;}
|
||||
|
||||
@ -584,6 +599,42 @@ struct dual_hash_type {
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
std::enable_if_t<!(std::is_same<DeviceType,LMPDeviceType>::value || Kokkos::SpaceAccessibility<LMPDeviceType::memory_space,LMPHostType::memory_space>::accessible),const host_hash_type&> const_view() const {return h_view;}
|
||||
|
||||
void modify_device()
|
||||
{
|
||||
modified_device = true;
|
||||
if (modified_device && modified_host)
|
||||
Kokkos::abort("Concurrent modification of host and device hashes");
|
||||
}
|
||||
|
||||
void modify_host()
|
||||
{
|
||||
modified_host = true;
|
||||
if (modified_device && modified_host)
|
||||
Kokkos::abort("Concurrent modification of host and device hashes");
|
||||
}
|
||||
|
||||
void sync_device()
|
||||
{
|
||||
if (modified_host) {
|
||||
Kokkos::deep_copy(d_view,h_view);
|
||||
modified_host = false;
|
||||
}
|
||||
}
|
||||
|
||||
void sync_host()
|
||||
{
|
||||
if (modified_device) {
|
||||
Kokkos::deep_copy(h_view,d_view);
|
||||
modified_device = false;
|
||||
}
|
||||
}
|
||||
|
||||
template<class DeviceType>
|
||||
std::enable_if_t<(std::is_same<DeviceType,LMPDeviceType>::value || Kokkos::SpaceAccessibility<LMPDeviceType::memory_space,LMPHostType::memory_space>::accessible),void> sync() {sync_device();}
|
||||
|
||||
template<class DeviceType>
|
||||
std::enable_if_t<!(std::is_same<DeviceType,LMPDeviceType>::value || Kokkos::SpaceAccessibility<LMPDeviceType::memory_space,LMPHostType::memory_space>::accessible),void> sync() {sync_host();}
|
||||
|
||||
};
|
||||
|
||||
template <class DeviceType>
|
||||
@ -601,6 +652,13 @@ typedef tdual_int_scalar::t_dev_const t_int_scalar_const;
|
||||
typedef tdual_int_scalar::t_dev_um t_int_scalar_um;
|
||||
typedef tdual_int_scalar::t_dev_const_um t_int_scalar_const_um;
|
||||
|
||||
typedef Kokkos::
|
||||
DualView<LAMMPS_NS::tagint, LMPDeviceType::array_layout, LMPDeviceType> tdual_tagint_scalar;
|
||||
typedef tdual_tagint_scalar::t_dev t_tagint_scalar;
|
||||
typedef tdual_tagint_scalar::t_dev_const t_tagint_scalar_const;
|
||||
typedef tdual_tagint_scalar::t_dev_um t_tagint_scalar_um;
|
||||
typedef tdual_tagint_scalar::t_dev_const_um t_tagint_scalar_const_um;
|
||||
|
||||
typedef Kokkos::
|
||||
DualView<LMP_FLOAT, LMPDeviceType::array_layout, LMPDeviceType>
|
||||
tdual_float_scalar;
|
||||
@ -919,6 +977,12 @@ typedef tdual_int_scalar::t_host_const t_int_scalar_const;
|
||||
typedef tdual_int_scalar::t_host_um t_int_scalar_um;
|
||||
typedef tdual_int_scalar::t_host_const_um t_int_scalar_const_um;
|
||||
|
||||
typedef Kokkos::DualView<LAMMPS_NS::tagint, LMPDeviceType::array_layout, LMPDeviceType> tdual_tagint_scalar;
|
||||
typedef tdual_tagint_scalar::t_host t_tagint_scalar;
|
||||
typedef tdual_tagint_scalar::t_host_const t_tagint_scalar_const;
|
||||
typedef tdual_tagint_scalar::t_host_um t_tagint_scalar_um;
|
||||
typedef tdual_tagint_scalar::t_host_const_um t_tagint_scalar_const_um;
|
||||
|
||||
typedef Kokkos::DualView<LMP_FLOAT, LMPDeviceType::array_layout, LMPDeviceType> tdual_float_scalar;
|
||||
typedef tdual_float_scalar::t_host t_float_scalar;
|
||||
typedef tdual_float_scalar::t_host_const t_float_scalar_const;
|
||||
|
||||
@ -50,13 +50,16 @@ NeighBondKokkos<DeviceType>::NeighBondKokkos(LAMMPS *lmp) : Pointers(lmp)
|
||||
datamask_read = EMPTY_MASK;
|
||||
datamask_modify = EMPTY_MASK;
|
||||
|
||||
k_nlist = DAT::tdual_int_scalar("NeighBond:nlist");
|
||||
d_nlist = k_nlist.view<DeviceType>();
|
||||
h_nlist = k_nlist.h_view;
|
||||
// use 1D view for scalars to reduce GPU memory operations
|
||||
|
||||
k_fail_flag = DAT::tdual_int_scalar("NeighBond:fail_flag");
|
||||
d_fail_flag = k_fail_flag.view<DeviceType>();
|
||||
h_fail_flag = k_fail_flag.h_view;
|
||||
d_scalars = typename AT::t_int_1d("NeighBond:scalars",2);
|
||||
h_scalars = HAT::t_int_1d("NeighBond:scalars_mirror",2);
|
||||
|
||||
d_nlist = Kokkos::subview(d_scalars,0);
|
||||
d_fail_flag = Kokkos::subview(d_scalars,1);
|
||||
|
||||
h_nlist = Kokkos::subview(h_scalars,0);
|
||||
h_fail_flag = Kokkos::subview(h_scalars,1);
|
||||
|
||||
maxbond = 0;
|
||||
maxangle = 0;
|
||||
@ -240,22 +243,14 @@ void NeighBondKokkos<DeviceType>::bond_all()
|
||||
do {
|
||||
nmissing = 0;
|
||||
|
||||
h_nlist() = 0;
|
||||
k_nlist.template modify<LMPHostType>();
|
||||
k_nlist.template sync<DeviceType>();
|
||||
|
||||
h_fail_flag() = 0;
|
||||
k_fail_flag.template modify<LMPHostType>();
|
||||
k_fail_flag.template sync<DeviceType>();
|
||||
Kokkos::deep_copy(d_scalars,0);
|
||||
|
||||
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagNeighBondBondAll>(0,nlocal),*this,nmissing);
|
||||
|
||||
k_nlist.template modify<DeviceType>();
|
||||
k_nlist.template sync<LMPHostType>();
|
||||
Kokkos::deep_copy(h_scalars,d_scalars);
|
||||
|
||||
neighbor->nbondlist = h_nlist();
|
||||
|
||||
k_fail_flag.template modify<DeviceType>();
|
||||
k_fail_flag.template sync<LMPHostType>();
|
||||
if (h_fail_flag()) {
|
||||
maxbond = neighbor->nbondlist + BONDDELTA;
|
||||
memoryKK->grow_kokkos(k_bondlist,neighbor->bondlist,maxbond,3,"neighbor:neighbor->bondlist");
|
||||
@ -327,22 +322,14 @@ void NeighBondKokkos<DeviceType>::bond_partial()
|
||||
do {
|
||||
nmissing = 0;
|
||||
|
||||
h_nlist() = 0;
|
||||
k_nlist.template modify<LMPHostType>();
|
||||
k_nlist.template sync<DeviceType>();
|
||||
|
||||
h_fail_flag() = 0;
|
||||
k_fail_flag.template modify<LMPHostType>();
|
||||
k_fail_flag.template sync<DeviceType>();
|
||||
Kokkos::deep_copy(d_scalars,0);
|
||||
|
||||
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagNeighBondBondPartial>(0,nlocal),*this,nmissing);
|
||||
|
||||
k_nlist.template modify<DeviceType>();
|
||||
k_nlist.template sync<LMPHostType>();
|
||||
Kokkos::deep_copy(h_scalars,d_scalars);
|
||||
|
||||
neighbor->nbondlist = h_nlist();
|
||||
|
||||
k_fail_flag.template modify<DeviceType>();
|
||||
k_fail_flag.template sync<LMPHostType>();
|
||||
if (h_fail_flag()) {
|
||||
maxbond = neighbor->nbondlist + BONDDELTA;
|
||||
memoryKK->grow_kokkos(k_bondlist,neighbor->bondlist,maxbond,3,"neighbor:neighbor->bondlist");
|
||||
@ -440,22 +427,14 @@ void NeighBondKokkos<DeviceType>::angle_all()
|
||||
do {
|
||||
nmissing = 0;
|
||||
|
||||
h_nlist() = 0;
|
||||
k_nlist.template modify<LMPHostType>();
|
||||
k_nlist.template sync<DeviceType>();
|
||||
|
||||
h_fail_flag() = 0;
|
||||
k_fail_flag.template modify<LMPHostType>();
|
||||
k_fail_flag.template sync<DeviceType>();
|
||||
Kokkos::deep_copy(d_scalars,0);
|
||||
|
||||
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagNeighBondAngleAll>(0,nlocal),*this,nmissing);
|
||||
|
||||
k_nlist.template modify<DeviceType>();
|
||||
k_nlist.template sync<LMPHostType>();
|
||||
Kokkos::deep_copy(h_scalars,d_scalars);
|
||||
|
||||
neighbor->nanglelist = h_nlist();
|
||||
|
||||
k_fail_flag.template modify<DeviceType>();
|
||||
k_fail_flag.template sync<LMPHostType>();
|
||||
if (h_fail_flag()) {
|
||||
maxangle = neighbor->nanglelist + BONDDELTA;
|
||||
memoryKK->grow_kokkos(k_anglelist,neighbor->anglelist,maxangle,4,"neighbor:neighbor->anglelist");
|
||||
@ -534,22 +513,14 @@ void NeighBondKokkos<DeviceType>::angle_partial()
|
||||
do {
|
||||
nmissing = 0;
|
||||
|
||||
h_nlist() = 0;
|
||||
k_nlist.template modify<LMPHostType>();
|
||||
k_nlist.template sync<DeviceType>();
|
||||
|
||||
h_fail_flag() = 0;
|
||||
k_fail_flag.template modify<LMPHostType>();
|
||||
k_fail_flag.template sync<DeviceType>();
|
||||
Kokkos::deep_copy(d_scalars,0);
|
||||
|
||||
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagNeighBondAnglePartial>(0,nlocal),*this,nmissing);
|
||||
|
||||
k_nlist.template modify<DeviceType>();
|
||||
k_nlist.template sync<LMPHostType>();
|
||||
Kokkos::deep_copy(h_scalars,d_scalars);
|
||||
|
||||
neighbor->nanglelist = h_nlist();
|
||||
|
||||
k_fail_flag.template modify<DeviceType>();
|
||||
k_fail_flag.template sync<LMPHostType>();
|
||||
if (h_fail_flag()) {
|
||||
maxangle = neighbor->nanglelist + BONDDELTA;
|
||||
memoryKK->grow_kokkos(k_anglelist,neighbor->anglelist,maxangle,4,"neighbor:neighbor->anglelist");
|
||||
@ -667,22 +638,14 @@ void NeighBondKokkos<DeviceType>::dihedral_all()
|
||||
do {
|
||||
nmissing = 0;
|
||||
|
||||
h_nlist() = 0;
|
||||
k_nlist.template modify<LMPHostType>();
|
||||
k_nlist.template sync<DeviceType>();
|
||||
|
||||
h_fail_flag() = 0;
|
||||
k_fail_flag.template modify<LMPHostType>();
|
||||
k_fail_flag.template sync<DeviceType>();
|
||||
Kokkos::deep_copy(d_scalars,0);
|
||||
|
||||
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagNeighBondDihedralAll>(0,nlocal),*this,nmissing);
|
||||
|
||||
k_nlist.template modify<DeviceType>();
|
||||
k_nlist.template sync<LMPHostType>();
|
||||
Kokkos::deep_copy(h_scalars,d_scalars);
|
||||
|
||||
neighbor->ndihedrallist = h_nlist();
|
||||
|
||||
k_fail_flag.template modify<DeviceType>();
|
||||
k_fail_flag.template sync<LMPHostType>();
|
||||
if (h_fail_flag()) {
|
||||
maxdihedral = neighbor->ndihedrallist + BONDDELTA;
|
||||
memoryKK->grow_kokkos(k_dihedrallist,neighbor->dihedrallist,maxdihedral,5,"neighbor:neighbor->dihedrallist");
|
||||
@ -766,22 +729,14 @@ void NeighBondKokkos<DeviceType>::dihedral_partial()
|
||||
do {
|
||||
nmissing = 0;
|
||||
|
||||
h_nlist() = 0;
|
||||
k_nlist.template modify<LMPHostType>();
|
||||
k_nlist.template sync<DeviceType>();
|
||||
|
||||
h_fail_flag() = 0;
|
||||
k_fail_flag.template modify<LMPHostType>();
|
||||
k_fail_flag.template sync<DeviceType>();
|
||||
Kokkos::deep_copy(d_scalars,0);
|
||||
|
||||
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagNeighBondDihedralPartial>(0,nlocal),*this,nmissing);
|
||||
|
||||
k_nlist.template modify<DeviceType>();
|
||||
k_nlist.template sync<LMPHostType>();
|
||||
Kokkos::deep_copy(h_scalars,d_scalars);
|
||||
|
||||
neighbor->ndihedrallist = h_nlist();
|
||||
|
||||
k_fail_flag.template modify<DeviceType>();
|
||||
k_fail_flag.template sync<LMPHostType>();
|
||||
if (h_fail_flag()) {
|
||||
maxdihedral = neighbor->ndihedrallist + BONDDELTA;
|
||||
memoryKK->grow_kokkos(k_dihedrallist,neighbor->dihedrallist,maxdihedral,5,"neighbor:neighbor->dihedrallist");
|
||||
@ -921,22 +876,14 @@ void NeighBondKokkos<DeviceType>::improper_all()
|
||||
do {
|
||||
nmissing = 0;
|
||||
|
||||
h_nlist() = 0;
|
||||
k_nlist.template modify<LMPHostType>();
|
||||
k_nlist.template sync<DeviceType>();
|
||||
|
||||
h_fail_flag() = 0;
|
||||
k_fail_flag.template modify<LMPHostType>();
|
||||
k_fail_flag.template sync<DeviceType>();
|
||||
Kokkos::deep_copy(d_scalars,0);
|
||||
|
||||
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagNeighBondImproperAll>(0,nlocal),*this,nmissing);
|
||||
|
||||
k_nlist.template modify<DeviceType>();
|
||||
k_nlist.template sync<LMPHostType>();
|
||||
Kokkos::deep_copy(h_scalars,d_scalars);
|
||||
|
||||
neighbor->nimproperlist = h_nlist();
|
||||
|
||||
k_fail_flag.template modify<DeviceType>();
|
||||
k_fail_flag.template sync<LMPHostType>();
|
||||
if (h_fail_flag()) {
|
||||
maximproper = neighbor->nimproperlist + BONDDELTA;
|
||||
memoryKK->grow_kokkos(k_improperlist,neighbor->improperlist,maximproper,5,"neighbor:neighbor->improperlist");
|
||||
@ -1020,22 +967,14 @@ void NeighBondKokkos<DeviceType>::improper_partial()
|
||||
do {
|
||||
nmissing = 0;
|
||||
|
||||
h_nlist() = 0;
|
||||
k_nlist.template modify<LMPHostType>();
|
||||
k_nlist.template sync<DeviceType>();
|
||||
|
||||
h_fail_flag() = 0;
|
||||
k_fail_flag.template modify<LMPHostType>();
|
||||
k_fail_flag.template sync<DeviceType>();
|
||||
Kokkos::deep_copy(d_scalars,0);
|
||||
|
||||
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagNeighBondImproperPartial>(0,nlocal),*this,nmissing);
|
||||
|
||||
k_nlist.template modify<DeviceType>();
|
||||
k_nlist.template sync<LMPHostType>();
|
||||
Kokkos::deep_copy(h_scalars,d_scalars);
|
||||
|
||||
neighbor->nimproperlist = h_nlist();
|
||||
|
||||
k_fail_flag.template modify<DeviceType>();
|
||||
k_fail_flag.template sync<LMPHostType>();
|
||||
if (h_fail_flag()) {
|
||||
maximproper = neighbor->nimproperlist + BONDDELTA;
|
||||
memoryKK->grow_kokkos(k_improperlist,neighbor->improperlist,maximproper,5,"neighbor:neighbor->improperlist");
|
||||
@ -1221,6 +1160,7 @@ void NeighBondKokkos<DeviceType>::update_class_variables()
|
||||
k_map_array.template sync<DeviceType>();
|
||||
} else if (map_style == Atom::MAP_HASH) {
|
||||
k_map_hash = atomKK->k_map_hash;
|
||||
k_map_hash.template sync<DeviceType>();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -115,11 +115,10 @@ class NeighBondKokkos : protected Pointers {
|
||||
typename AT::t_tagint_2d improper_atom1,improper_atom2,
|
||||
improper_atom3,improper_atom4;
|
||||
|
||||
DAT::tdual_int_scalar k_nlist;
|
||||
typename AT::t_int_1d d_scalars;
|
||||
HAT::t_int_1d h_scalars;
|
||||
typename AT::t_int_scalar d_nlist;
|
||||
HAT::t_int_scalar h_nlist;
|
||||
|
||||
DAT::tdual_int_scalar k_fail_flag;
|
||||
typename AT::t_int_scalar d_fail_flag;
|
||||
HAT::t_int_scalar h_fail_flag;
|
||||
|
||||
|
||||
@ -137,6 +137,12 @@ struct PairComputeFunctor {
|
||||
F_FLOAT fytmp = 0.0;
|
||||
F_FLOAT fztmp = 0.0;
|
||||
|
||||
if (NEIGHFLAG == FULL) {
|
||||
f(i,0) = 0.0;
|
||||
f(i,1) = 0.0;
|
||||
f(i,2) = 0.0;
|
||||
}
|
||||
|
||||
for (int jj = 0; jj < jnum; jj++) {
|
||||
int j = neighbors_i(jj);
|
||||
const F_FLOAT factor_lj = c.special_lj[sbmask(j)];
|
||||
@ -205,6 +211,12 @@ struct PairComputeFunctor {
|
||||
F_FLOAT fytmp = 0.0;
|
||||
F_FLOAT fztmp = 0.0;
|
||||
|
||||
if (NEIGHFLAG == FULL) {
|
||||
f(i,0) = 0.0;
|
||||
f(i,1) = 0.0;
|
||||
f(i,2) = 0.0;
|
||||
}
|
||||
|
||||
for (int jj = 0; jj < jnum; jj++) {
|
||||
int j = neighbors_i(jj);
|
||||
const F_FLOAT factor_lj = c.special_lj[sbmask(j)];
|
||||
@ -767,7 +779,6 @@ EV_FLOAT pair_compute_neighlist (PairStyle* fpair, typename std::enable_if<(NEIG
|
||||
fpair->lmp->kokkos->neigh_thread = 1;
|
||||
|
||||
if (fpair->lmp->kokkos->neigh_thread) {
|
||||
fpair->fuse_force_clear_flag = 1;
|
||||
|
||||
int vector_length = 8;
|
||||
int atoms_per_team = 32;
|
||||
@ -805,6 +816,7 @@ template<class PairStyle, class Specialisation>
|
||||
EV_FLOAT pair_compute (PairStyle* fpair, NeighListKokkos<typename PairStyle::device_type>* list) {
|
||||
EV_FLOAT ev;
|
||||
if (fpair->neighflag == FULL) {
|
||||
fpair->fuse_force_clear_flag = 1;
|
||||
ev = pair_compute_neighlist<PairStyle,FULL,Specialisation> (fpair,list);
|
||||
} else if (fpair->neighflag == HALFTHREAD) {
|
||||
ev = pair_compute_neighlist<PairStyle,HALFTHREAD,Specialisation> (fpair,list);
|
||||
@ -860,11 +872,7 @@ void pair_virial_fdotr_compute(PairStyle* fpair) {
|
||||
fpair->virial[5] = virial.v[5];
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
@ -383,7 +383,7 @@ class Atom : protected Pointers {
|
||||
// map lookup function inlined for efficiency
|
||||
// return -1 if no map defined
|
||||
|
||||
inline int map(tagint global)
|
||||
virtual inline int map(tagint global)
|
||||
{
|
||||
if (map_style == 1)
|
||||
return map_array[global];
|
||||
@ -396,10 +396,10 @@ class Atom : protected Pointers {
|
||||
virtual void map_init(int check = 1);
|
||||
virtual void map_clear();
|
||||
virtual void map_set();
|
||||
void map_one(tagint, int);
|
||||
virtual void map_one(tagint, int);
|
||||
int map_style_set();
|
||||
virtual void map_delete();
|
||||
int map_find_hash(tagint);
|
||||
virtual int map_find_hash(tagint);
|
||||
|
||||
protected:
|
||||
// global to local ID mapping
|
||||
|
||||
@ -215,7 +215,7 @@ int FixLangevin::setmask()
|
||||
if (gjfflag) mask |= INITIAL_INTEGRATE;
|
||||
mask |= POST_FORCE;
|
||||
mask |= POST_FORCE_RESPA;
|
||||
mask |= END_OF_STEP;
|
||||
if (tallyflag || gjfflag) mask |= END_OF_STEP;
|
||||
return mask;
|
||||
}
|
||||
|
||||
@ -915,8 +915,6 @@ void FixLangevin::angmom_thermostat()
|
||||
|
||||
void FixLangevin::end_of_step()
|
||||
{
|
||||
if (!tallyflag && !gjfflag) return;
|
||||
|
||||
double **v = atom->v;
|
||||
int *mask = atom->mask;
|
||||
int nlocal = atom->nlocal;
|
||||
|
||||
Reference in New Issue
Block a user