Only use team with full neigh list

This commit is contained in:
Stan Moore
2019-04-09 09:17:07 -06:00
parent 16b17f812c
commit 82be3ee32c
4 changed files with 75 additions and 71 deletions

View File

@ -450,10 +450,14 @@ qeq/reax/kk"_fix_qeq_reax.html. If not explicitly set, the value of
If the {neigh/thread} keyword is set to {off}, then the KOKKOS package If the {neigh/thread} keyword is set to {off}, then the KOKKOS package
threads only over atoms. However, for small systems, this may not expose threads only over atoms. However, for small systems, this may not expose
enough parallelism to keep a GPU busy. When this keyword is set to {on}, enough parallelism to keep a GPU busy. When this keyword is set to {on},
the KOKKOS package threads over both atoms and neighbors of atoms. the KOKKOS package threads over both atoms and neighbors of atoms. When
Using {neigh/thread} {on} may be slower for large systems, so this this using {neigh/thread} {on}, a full neighbor list must also be used. Using
option is turned on by default only when there are 16K atoms or less {neigh/thread} {on} may be slower for large systems, so this this option
owned by an MPI rank. is turned on by default only when there are 16K atoms or less owned by
an MPI rank and when using a full neighbor list. Not all KOKKOS-enabled
potentials support this keyword yet, and only thread over atoms. Many
simple pair-wise potentials such as Lennard-Jones do support threading
over both atoms and neighbors.
The {newton} keyword sets the Newton flags for pairwise and bonded The {newton} keyword sets the Newton flags for pairwise and bonded
interactions to {off} or {on}, the same as the "newton"_newton.html interactions to {off} or {on}, the same as the "newton"_newton.html

View File

@ -194,75 +194,75 @@ void CommKokkos::forward_comm_device(int dummy)
k_firstrecv,k_pbc_flag,k_pbc,k_g2l); k_firstrecv,k_pbc_flag,k_pbc,k_g2l);
} else { } else {
for (int iswap = 0; iswap < nswap; iswap++) { for (int iswap = 0; iswap < nswap; iswap++) {
if (sendproc[iswap] != me) { if (sendproc[iswap] != me) {
if (comm_x_only) { if (comm_x_only) {
if (size_forward_recv[iswap]) { if (size_forward_recv[iswap]) {
buf = atomKK->k_x.view<DeviceType>().data() + buf = atomKK->k_x.view<DeviceType>().data() +
firstrecv[iswap]*atomKK->k_x.view<DeviceType>().extent(1); firstrecv[iswap]*atomKK->k_x.view<DeviceType>().extent(1);
MPI_Irecv(buf,size_forward_recv[iswap],MPI_DOUBLE, MPI_Irecv(buf,size_forward_recv[iswap],MPI_DOUBLE,
recvproc[iswap],0,world,&request); recvproc[iswap],0,world,&request);
} }
n = avec->pack_comm_kokkos(sendnum[iswap],k_sendlist, n = avec->pack_comm_kokkos(sendnum[iswap],k_sendlist,
iswap,k_buf_send,pbc_flag[iswap],pbc[iswap]); iswap,k_buf_send,pbc_flag[iswap],pbc[iswap]);
DeviceType::fence(); DeviceType::fence();
if (n) { if (n) {
MPI_Send(k_buf_send.view<DeviceType>().data(), MPI_Send(k_buf_send.view<DeviceType>().data(),
n,MPI_DOUBLE,sendproc[iswap],0,world); n,MPI_DOUBLE,sendproc[iswap],0,world);
} }
if (size_forward_recv[iswap]) { if (size_forward_recv[iswap]) {
MPI_Wait(&request,MPI_STATUS_IGNORE); MPI_Wait(&request,MPI_STATUS_IGNORE);
atomKK->modified(ExecutionSpaceFromDevice<DeviceType>:: atomKK->modified(ExecutionSpaceFromDevice<DeviceType>::
space,X_MASK); space,X_MASK);
}
} else if (ghost_velocity) {
if (size_forward_recv[iswap]) {
MPI_Irecv(k_buf_recv.view<DeviceType>().data(),
size_forward_recv[iswap],MPI_DOUBLE,
recvproc[iswap],0,world,&request);
}
n = avec->pack_comm_vel_kokkos(sendnum[iswap],k_sendlist,iswap,
k_buf_send,pbc_flag[iswap],pbc[iswap]);
DeviceType::fence();
if (n) {
MPI_Send(k_buf_send.view<DeviceType>().data(),n,
MPI_DOUBLE,sendproc[iswap],0,world);
}
if (size_forward_recv[iswap]) MPI_Wait(&request,MPI_STATUS_IGNORE);
avec->unpack_comm_vel_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_recv);
DeviceType::fence();
} else {
if (size_forward_recv[iswap])
MPI_Irecv(k_buf_recv.view<DeviceType>().data(),
size_forward_recv[iswap],MPI_DOUBLE,
recvproc[iswap],0,world,&request);
n = avec->pack_comm_kokkos(sendnum[iswap],k_sendlist,iswap,
k_buf_send,pbc_flag[iswap],pbc[iswap]);
DeviceType::fence();
if (n)
MPI_Send(k_buf_send.view<DeviceType>().data(),n,
MPI_DOUBLE,sendproc[iswap],0,world);
if (size_forward_recv[iswap]) MPI_Wait(&request,MPI_STATUS_IGNORE);
avec->unpack_comm_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_recv);
DeviceType::fence();
} }
} else if (ghost_velocity) {
if (size_forward_recv[iswap]) {
MPI_Irecv(k_buf_recv.view<DeviceType>().data(),
size_forward_recv[iswap],MPI_DOUBLE,
recvproc[iswap],0,world,&request);
}
n = avec->pack_comm_vel_kokkos(sendnum[iswap],k_sendlist,iswap,
k_buf_send,pbc_flag[iswap],pbc[iswap]);
DeviceType::fence();
if (n) {
MPI_Send(k_buf_send.view<DeviceType>().data(),n,
MPI_DOUBLE,sendproc[iswap],0,world);
}
if (size_forward_recv[iswap]) MPI_Wait(&request,MPI_STATUS_IGNORE);
avec->unpack_comm_vel_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_recv);
DeviceType::fence();
} else { } else {
if (size_forward_recv[iswap]) if (!ghost_velocity) {
MPI_Irecv(k_buf_recv.view<DeviceType>().data(), if (sendnum[iswap])
size_forward_recv[iswap],MPI_DOUBLE, n = avec->pack_comm_self(sendnum[iswap],k_sendlist,iswap,
recvproc[iswap],0,world,&request); firstrecv[iswap],pbc_flag[iswap],pbc[iswap]);
n = avec->pack_comm_kokkos(sendnum[iswap],k_sendlist,iswap, DeviceType::fence();
k_buf_send,pbc_flag[iswap],pbc[iswap]); } else {
DeviceType::fence(); n = avec->pack_comm_vel_kokkos(sendnum[iswap],k_sendlist,iswap,
if (n) k_buf_send,pbc_flag[iswap],pbc[iswap]);
MPI_Send(k_buf_send.view<DeviceType>().data(),n, DeviceType::fence();
MPI_DOUBLE,sendproc[iswap],0,world); avec->unpack_comm_vel_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_send);
if (size_forward_recv[iswap]) MPI_Wait(&request,MPI_STATUS_IGNORE); DeviceType::fence();
avec->unpack_comm_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_recv); }
DeviceType::fence();
}
} else {
if (!ghost_velocity) {
if (sendnum[iswap])
n = avec->pack_comm_self(sendnum[iswap],k_sendlist,iswap,
firstrecv[iswap],pbc_flag[iswap],pbc[iswap]);
DeviceType::fence();
} else {
n = avec->pack_comm_vel_kokkos(sendnum[iswap],k_sendlist,iswap,
k_buf_send,pbc_flag[iswap],pbc[iswap]);
DeviceType::fence();
avec->unpack_comm_vel_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_send);
DeviceType::fence();
} }
} }
} }
}
} }
/* ---------------------------------------------------------------------- /* ----------------------------------------------------------------------

View File

@ -89,8 +89,8 @@ U: Must use Kokkos half/thread or full neighbor list with threads or GPUs
Using Kokkos half-neighbor lists with threading is not allowed. Using Kokkos half-neighbor lists with threading is not allowed.
E: Must use KOKKOS package option 'neigh full' with 'neigh_thread on' E: Must use KOKKOS package option 'neigh full' with 'neigh/thread on'
The 'neigh_thread on' option requires a full neighbor list The 'neigh/thread on' option requires a full neighbor list
*/ */

View File

@ -868,7 +868,7 @@ EV_FLOAT pair_compute_neighlist (PairStyle* fpair, typename Kokkos::Impl::enable
EV_FLOAT ev; EV_FLOAT ev;
if (!fpair->lmp->kokkos->neigh_thread_set) if (!fpair->lmp->kokkos->neigh_thread_set)
if (list->inum <= 16384) if (list->inum <= 16384 && NEIGHFLAG == FULL)
fpair->lmp->kokkos->neigh_thread = 1; fpair->lmp->kokkos->neigh_thread = 1;
if (fpair->lmp->kokkos->neigh_thread) { if (fpair->lmp->kokkos->neigh_thread) {