USER-DPD Kokkos: use a parallel_for() to build the ghosts workplan for SSA

This commit is contained in:
Tim Mattox
2017-04-06 03:53:57 -04:00
parent 9e272cb393
commit 178af2ec9e
2 changed files with 23 additions and 49 deletions

View File

@ -333,7 +333,10 @@ fprintf(stdout, "tota%03d total %3d could use %6d inums, expected %6d inums. inu
}
// count how many ghosts might have neighbors, and increase the work plan storage
for (int workPhase = 0; workPhase < ssa_gphaseCt; workPhase++) {
nl_size += k_gbincount.h_view(workPhase + 1);
int len = k_gbincount.h_view(workPhase + 1);
ssa_gitemLoc(workPhase,0) = nl_size; // record where workItem starts in ilist
ssa_gitemLen(workPhase,0) = len;
nl_size += len;
}
list->grow(nl_size); // Make special larger SSA neighbor list
@ -415,22 +418,19 @@ fprintf(stdout, "tota%03d total %3d could use %6d inums, expected %6d inums. inu
Kokkos::deep_copy(data.resize, data.h_resize);
Kokkos::deep_copy(data.new_maxneighs, data.h_new_maxneighs);
#ifdef NOTYET
NPairSSAKokkosBuildFunctor<DeviceType> f(data,atoms_per_bin*5*sizeof(X_FLOAT));
Kokkos::parallel_for(nall, f);
#endif
// loop over bins with local atoms, storing half of the neighbors
#ifdef USE_LAMBDA_BUILD
Kokkos::parallel_for(ssa_phaseCt, LAMMPS_LAMBDA (const int workPhase) {
data.build_locals_onePhase(firstTry, comm->me, workPhase);
});
#else
NPairSSAKokkosBuildFunctor<DeviceType> f(data, firstTry, comm->me);
Kokkos::parallel_for(ssa_phaseCt, f);
#endif
data.neigh_list.inum = ssa_itemLoc(ssa_phaseCt-1,ssa_phaseLen(ssa_phaseCt-1)-1) +
ssa_itemLen(ssa_phaseCt-1,ssa_phaseLen(ssa_phaseCt-1)-1);
data.build_ghosts();
// loop over AIR ghost atoms, storing their local neighbors
Kokkos::parallel_for(ssa_gphaseCt, LAMMPS_LAMBDA (const int workPhase) {
data.build_ghosts_onePhase(workPhase);
});
data.neigh_list.gnum = ssa_gitemLoc(ssa_gphaseCt-1,ssa_gphaseLen(ssa_gphaseCt-1)-1) +
ssa_gitemLen(ssa_gphaseCt-1,ssa_gphaseLen(ssa_gphaseCt-1)-1) - data.neigh_list.inum;
firstTry = false;
DeviceType::fence();
@ -606,34 +606,27 @@ fprintf(stdout, "Phas%03d phase %3d used %6d inums, workItems = %3d, skipped = %
template<class DeviceType>
void NPairSSAKokkosExecute<DeviceType>::build_ghosts()
void NPairSSAKokkosExecute<DeviceType>::build_ghosts_onePhase(int workPhase) const
{
int n = 0;
const typename ArrayTypes<DeviceType>::t_int_1d_const_um stencil = d_stencil;
int which = 0;
int inum = neigh_list.inum;
int gnum = 0;
// loop over AIR ghost atoms, storing their local neighbors
// since these are ghosts, must check if stencil bin is out of bounds
for (int workPhase = 0; workPhase < ssa_gphaseCt; workPhase++) {
int airnum = workPhase + 1;
//FIXME for now, there is only 1 workItem for each ghost AIR
int workItem;
for (workItem = 0; workItem < 1; ++workItem) {
d_ssa_gitemLoc(workPhase, workItem) = inum + gnum; // record where workItem starts in ilist
int gNdx = d_ssa_gitemLoc(workPhase, workItem); // record where workItem starts in ilist
for (int il = 0; il < c_gbincount(airnum); ++il) {
const int i = c_gbins(airnum, il);
n = 0;
int n = 0;
const AtomNeighbors neighbors_i = neigh_list.get_neighbors(inum + gnum);
const AtomNeighbors neighbors_i = neigh_list.get_neighbors(gNdx);
const X_FLOAT xtmp = x(i, 0);
const X_FLOAT ytmp = x(i, 1);
const X_FLOAT ztmp = x(i, 2);
const int itype = type(i);
const typename ArrayTypes<DeviceType>::t_int_1d_const_um stencil
= d_stencil;
int loc[3];
const int ibin = coord2bin(x(i, 0), x(i, 1), x(i, 2), &(loc[0]));
@ -686,8 +679,8 @@ void NPairSSAKokkosExecute<DeviceType>::build_ghosts()
}
if (n > 0) {
neigh_list.d_numneigh(inum + gnum) = n;
neigh_list.d_ilist(inum + (gnum++)) = i;
neigh_list.d_numneigh(gNdx) = n;
neigh_list.d_ilist(gNdx++) = i;
if(n > neigh_list.maxneighs) {
resize() = 1;
if(n > new_maxneighs()) Kokkos::atomic_fetch_max(&new_maxneighs(),n);
@ -695,12 +688,10 @@ void NPairSSAKokkosExecute<DeviceType>::build_ghosts()
}
}
// record where workItem ends in ilist
d_ssa_gitemLen(workPhase,workItem) = inum + gnum - d_ssa_gitemLoc(workPhase,workItem);
d_ssa_gitemLen(workPhase,workItem) = gNdx - d_ssa_gitemLoc(workPhase,workItem);
// if (d_ssa_gitemLen(workPhase,workItem) > 0) workItem++;
}
d_ssa_gphaseLen(workPhase) = workItem;
}
neigh_list.gnum = gnum;
}
}

View File

@ -275,7 +275,7 @@ class NPairSSAKokkosExecute
bboxlo[0] = _bboxlo[0]; bboxlo[1] = _bboxlo[1]; bboxlo[2] = _bboxlo[2];
bboxhi[0] = _bboxhi[0]; bboxhi[1] = _bboxhi[1]; bboxhi[2] = _bboxhi[2];
resize = typename AT::t_int_scalar("NeighborKokkosFunctor::resize");
resize = typename AT::t_int_scalar("NPairSSAKokkosExecute::resize");
#ifndef KOKKOS_USE_CUDA_UVM
h_resize = Kokkos::create_mirror_view(resize);
#else
@ -283,7 +283,7 @@ class NPairSSAKokkosExecute
#endif
h_resize() = 1;
new_maxneighs = typename AT::
t_int_scalar("NeighborKokkosFunctor::new_maxneighs");
t_int_scalar("NPairSSAKokkosExecute::new_maxneighs");
#ifndef KOKKOS_USE_CUDA_UVM
h_new_maxneighs = Kokkos::create_mirror_view(new_maxneighs);
#else
@ -297,7 +297,8 @@ class NPairSSAKokkosExecute
KOKKOS_FUNCTION
void build_locals_onePhase(const bool firstTry, int me, int workPhase) const;
void build_ghosts();
KOKKOS_FUNCTION
void build_ghosts_onePhase(int workPhase) const;
KOKKOS_INLINE_FUNCTION
int coord2bin(const X_FLOAT & x,const X_FLOAT & y,const X_FLOAT & z, int* i) const
@ -351,24 +352,6 @@ class NPairSSAKokkosExecute
};
template<class DeviceType>
struct NPairSSAKokkosBuildFunctor {
typedef DeviceType device_type;
const NPairSSAKokkosExecute<DeviceType> c;
const bool firstTry;
const int me;
NPairSSAKokkosBuildFunctor(const NPairSSAKokkosExecute<DeviceType> &_c,
const bool _firstTry, const int _me):c(_c),
firstTry(_firstTry), me(_me) {};
KOKKOS_INLINE_FUNCTION
void operator() (const int & i) const {
c.build_locals_onePhase(firstTry, me, i);
}
};
}
#endif