Fix issues in Kokkos comm

This commit is contained in:
Stan Moore
2017-10-05 14:58:05 -06:00
parent ca032f21fb
commit 6bf2c60c07
2 changed files with 43 additions and 24 deletions

View File

@ -135,9 +135,10 @@ void CommKokkos::init()
if (force->newton == 0) check_reverse = 0;
if (force->pair) check_reverse += force->pair->comm_reverse_off;
if (check_reverse || check_forward)
forward_comm_classic = true;
//if (check_forward)
// forward_comm_classic = true;
//if (check_reverse || !comm_f_only) // not all Kokkos atom_vec styles have reverse pack/unpack routines yet
if (!comm_f_only) // not all Kokkos atom_vec styles have reverse pack/unpack routines yet
reverse_comm_classic = true;
}
@ -186,12 +187,12 @@ void CommKokkos::forward_comm_device(int dummy)
// if comm_x_only set, exchange or copy directly to x, don't unpack
k_sendlist.sync<DeviceType>();
atomKK->sync(ExecutionSpaceFromDevice<DeviceType>::space,X_MASK);
for (int iswap = 0; iswap < nswap; iswap++) {
if (sendproc[iswap] != me) {
if (comm_x_only) {
if (size_forward_recv[iswap]) {
atomKK->sync(ExecutionSpaceFromDevice<DeviceType>::space,X_MASK);
buf = atomKK->k_x.view<DeviceType>().ptr_on_device() +
firstrecv[iswap]*atomKK->k_x.view<DeviceType>().dimension_1();
MPI_Irecv(buf,size_forward_recv[iswap],MPI_DOUBLE,
@ -204,9 +205,11 @@ void CommKokkos::forward_comm_device(int dummy)
n,MPI_DOUBLE,sendproc[iswap],0,world);
}
if (size_forward_recv[iswap]) MPI_Wait(&request,MPI_STATUS_IGNORE);
atomKK->modified(ExecutionSpaceFromDevice<DeviceType>::
space,X_MASK);
if (size_forward_recv[iswap]) {
MPI_Wait(&request,MPI_STATUS_IGNORE);
atomKK->modified(ExecutionSpaceFromDevice<DeviceType>::
space,X_MASK);
}
} else if (ghost_velocity) {
error->all(FLERR,"Ghost velocity forward comm not yet "
"implemented with Kokkos");
@ -276,7 +279,7 @@ void CommKokkos::reverse_comm()
else
atomKK->modified(Host,ALL_MASK);
atomKK->sync(Device,ALL_MASK); // is this needed?
//atomKK->sync(Device,ALL_MASK); // is this needed?
}
template<class DeviceType>
@ -290,9 +293,10 @@ void CommKokkos::reverse_comm_device()
// exchange data with another proc
// if other proc is self, just copy
// if comm_f_only set, exchange or copy directly from f, don't pack
k_sendlist.sync<DeviceType>();
atomKK->sync(ExecutionSpaceFromDevice<DeviceType>::space,F_MASK);
for (int iswap = nswap-1; iswap >= 0; iswap--) {
if (sendproc[iswap] != me) {
if (comm_f_only) {
@ -300,16 +304,17 @@ void CommKokkos::reverse_comm_device()
MPI_Irecv(k_buf_recv.view<DeviceType>().ptr_on_device(),size_reverse_recv[iswap],MPI_DOUBLE,
sendproc[iswap],0,world,&request);
if (size_reverse_send[iswap]) {
atomKK->sync(ExecutionSpaceFromDevice<DeviceType>::space,F_MASK);
buf = atomKK->k_f.view<DeviceType>().ptr_on_device() +
firstrecv[iswap]*atomKK->k_f.view<DeviceType>().dimension_1();
MPI_Send(buf,size_reverse_send[iswap],MPI_DOUBLE,
recvproc[iswap],0,world);
}
if (size_reverse_recv[iswap]) MPI_Wait(&request,MPI_STATUS_IGNORE);
atomKK->modified(ExecutionSpaceFromDevice<DeviceType>::
space,F_MASK);
if (size_reverse_recv[iswap]) {
MPI_Wait(&request,MPI_STATUS_IGNORE);
atomKK->modified(ExecutionSpaceFromDevice<DeviceType>::
space,F_MASK);
}
} else {
if (size_reverse_recv[iswap])
MPI_Irecv(k_buf_recv.view<DeviceType>().ptr_on_device(),
@ -710,9 +715,7 @@ void CommKokkos::borders()
}
atomKK->sync(Host,ALL_MASK);
atomKK->modified(Host,ALL_MASK);
k_sendlist.sync<LMPHostType>();
k_sendlist.modify<LMPHostType>();
CommBrick::borders();
k_sendlist.modify<LMPHostType>();
atomKK->modified(Host,ALL_MASK);
@ -783,7 +786,7 @@ void CommKokkos::borders_device() {
AtomVecKokkos *avec = (AtomVecKokkos *) atom->avec;
ExecutionSpace exec_space = ExecutionSpaceFromDevice<DeviceType>::space;
k_sendlist.modify<DeviceType>();
k_sendlist.sync<DeviceType>();
atomKK->sync(exec_space,ALL_MASK);
// do swaps over all 3 dimensions
@ -845,20 +848,24 @@ void CommKokkos::borders_device() {
k_total_send.template modify<DeviceType>();
k_total_send.template sync<LMPHostType>();
k_sendlist.modify<DeviceType>();
if(k_total_send.h_view() >= maxsendlist[iswap]) {
grow_list(iswap,k_total_send.h_view());
k_sendlist.modify<DeviceType>();
k_total_send.h_view() = 0;
if(exec_space == Device) {
k_total_send.template modify<LMPHostType>();
k_total_send.template sync<LMPDeviceType>();
}
k_total_send.template modify<LMPHostType>();
k_total_send.template sync<LMPDeviceType>();
BuildBorderListFunctor<DeviceType> f(atomKK->k_x,k_sendlist,
k_total_send,nfirst,nlast,dim,lo,hi,iswap,maxsendlist[iswap]);
Kokkos::TeamPolicy<DeviceType> config((nlast-nfirst+127)/128,128);
Kokkos::parallel_for(config,f);
k_total_send.template modify<DeviceType>();
k_total_send.template sync<LMPHostType>();
k_sendlist.modify<DeviceType>();
}
nsend = k_total_send.h_view();
} else {
@ -983,7 +990,6 @@ void CommKokkos::borders_device() {
// reset global->local map
if (exec_space == Host) k_sendlist.sync<LMPDeviceType>();
atomKK->modified(exec_space,ALL_MASK);
if (map_style) {
atomKK->sync(Host,TAG_MASK);
@ -1057,6 +1063,11 @@ void CommKokkos::grow_list(int iswap, int n)
{
int size = static_cast<int> (BUFFACTOR * n);
if (exchange_comm_classic) { // force realloc on Host
k_sendlist.sync<LMPHostType>();
k_sendlist.modify<LMPHostType>();
}
memory->grow_kokkos(k_sendlist,sendlist,maxswap,size,"comm:sendlist");
for(int i=0;i<maxswap;i++) {
@ -1080,6 +1091,11 @@ void CommKokkos::grow_swap(int n)
maxswap = n;
int size = MAX(k_sendlist.d_view.dimension_1(),BUFMIN);
if (exchange_comm_classic) { // force realloc on Host
k_sendlist.sync<LMPHostType>();
k_sendlist.modify<LMPHostType>();
}
memory->grow_kokkos(k_sendlist,sendlist,maxswap,size,"comm:sendlist");
memory->grow(maxsendlist,n,"comm:maxsendlist");

View File

@ -526,8 +526,11 @@ void VerletKokkos::run(int n)
// reverse communication of forces
if (force->newton) comm->reverse_comm();
timer->stamp(Timer::COMM);
if (force->newton) {
Kokkos::fence();
comm->reverse_comm();
timer->stamp(Timer::COMM);
}
// force modifications, final time integration, diagnostics