Only use team with full neigh list
This commit is contained in:
@ -450,10 +450,14 @@ qeq/reax/kk"_fix_qeq_reax.html. If not explicitly set, the value of
|
||||
If the {neigh/thread} keyword is set to {off}, then the KOKKOS package
|
||||
threads only over atoms. However, for small systems, this may not expose
|
||||
enough parallelism to keep a GPU busy. When this keyword is set to {on},
|
||||
the KOKKOS package threads over both atoms and neighbors of atoms.
|
||||
Using {neigh/thread} {on} may be slower for large systems, so this this
|
||||
option is turned on by default only when there are 16K atoms or less
|
||||
owned by an MPI rank.
|
||||
the KOKKOS package threads over both atoms and neighbors of atoms. When
|
||||
using {neigh/thread} {on}, a full neighbor list must also be used. Using
|
||||
{neigh/thread} {on} may be slower for large systems, so this this option
|
||||
is turned on by default only when there are 16K atoms or less owned by
|
||||
an MPI rank and when using a full neighbor list. Not all KOKKOS-enabled
|
||||
potentials support this keyword yet, and only thread over atoms. Many
|
||||
simple pair-wise potentials such as Lennard-Jones do support threading
|
||||
over both atoms and neighbors.
|
||||
|
||||
The {newton} keyword sets the Newton flags for pairwise and bonded
|
||||
interactions to {off} or {on}, the same as the "newton"_newton.html
|
||||
|
||||
@ -194,75 +194,75 @@ void CommKokkos::forward_comm_device(int dummy)
|
||||
k_firstrecv,k_pbc_flag,k_pbc,k_g2l);
|
||||
} else {
|
||||
|
||||
for (int iswap = 0; iswap < nswap; iswap++) {
|
||||
if (sendproc[iswap] != me) {
|
||||
if (comm_x_only) {
|
||||
if (size_forward_recv[iswap]) {
|
||||
buf = atomKK->k_x.view<DeviceType>().data() +
|
||||
firstrecv[iswap]*atomKK->k_x.view<DeviceType>().extent(1);
|
||||
MPI_Irecv(buf,size_forward_recv[iswap],MPI_DOUBLE,
|
||||
recvproc[iswap],0,world,&request);
|
||||
}
|
||||
n = avec->pack_comm_kokkos(sendnum[iswap],k_sendlist,
|
||||
iswap,k_buf_send,pbc_flag[iswap],pbc[iswap]);
|
||||
DeviceType::fence();
|
||||
if (n) {
|
||||
MPI_Send(k_buf_send.view<DeviceType>().data(),
|
||||
n,MPI_DOUBLE,sendproc[iswap],0,world);
|
||||
}
|
||||
for (int iswap = 0; iswap < nswap; iswap++) {
|
||||
if (sendproc[iswap] != me) {
|
||||
if (comm_x_only) {
|
||||
if (size_forward_recv[iswap]) {
|
||||
buf = atomKK->k_x.view<DeviceType>().data() +
|
||||
firstrecv[iswap]*atomKK->k_x.view<DeviceType>().extent(1);
|
||||
MPI_Irecv(buf,size_forward_recv[iswap],MPI_DOUBLE,
|
||||
recvproc[iswap],0,world,&request);
|
||||
}
|
||||
n = avec->pack_comm_kokkos(sendnum[iswap],k_sendlist,
|
||||
iswap,k_buf_send,pbc_flag[iswap],pbc[iswap]);
|
||||
DeviceType::fence();
|
||||
if (n) {
|
||||
MPI_Send(k_buf_send.view<DeviceType>().data(),
|
||||
n,MPI_DOUBLE,sendproc[iswap],0,world);
|
||||
}
|
||||
|
||||
if (size_forward_recv[iswap]) {
|
||||
MPI_Wait(&request,MPI_STATUS_IGNORE);
|
||||
atomKK->modified(ExecutionSpaceFromDevice<DeviceType>::
|
||||
space,X_MASK);
|
||||
if (size_forward_recv[iswap]) {
|
||||
MPI_Wait(&request,MPI_STATUS_IGNORE);
|
||||
atomKK->modified(ExecutionSpaceFromDevice<DeviceType>::
|
||||
space,X_MASK);
|
||||
}
|
||||
} else if (ghost_velocity) {
|
||||
if (size_forward_recv[iswap]) {
|
||||
MPI_Irecv(k_buf_recv.view<DeviceType>().data(),
|
||||
size_forward_recv[iswap],MPI_DOUBLE,
|
||||
recvproc[iswap],0,world,&request);
|
||||
}
|
||||
n = avec->pack_comm_vel_kokkos(sendnum[iswap],k_sendlist,iswap,
|
||||
k_buf_send,pbc_flag[iswap],pbc[iswap]);
|
||||
DeviceType::fence();
|
||||
if (n) {
|
||||
MPI_Send(k_buf_send.view<DeviceType>().data(),n,
|
||||
MPI_DOUBLE,sendproc[iswap],0,world);
|
||||
}
|
||||
if (size_forward_recv[iswap]) MPI_Wait(&request,MPI_STATUS_IGNORE);
|
||||
avec->unpack_comm_vel_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_recv);
|
||||
DeviceType::fence();
|
||||
} else {
|
||||
if (size_forward_recv[iswap])
|
||||
MPI_Irecv(k_buf_recv.view<DeviceType>().data(),
|
||||
size_forward_recv[iswap],MPI_DOUBLE,
|
||||
recvproc[iswap],0,world,&request);
|
||||
n = avec->pack_comm_kokkos(sendnum[iswap],k_sendlist,iswap,
|
||||
k_buf_send,pbc_flag[iswap],pbc[iswap]);
|
||||
DeviceType::fence();
|
||||
if (n)
|
||||
MPI_Send(k_buf_send.view<DeviceType>().data(),n,
|
||||
MPI_DOUBLE,sendproc[iswap],0,world);
|
||||
if (size_forward_recv[iswap]) MPI_Wait(&request,MPI_STATUS_IGNORE);
|
||||
avec->unpack_comm_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_recv);
|
||||
DeviceType::fence();
|
||||
}
|
||||
} else if (ghost_velocity) {
|
||||
if (size_forward_recv[iswap]) {
|
||||
MPI_Irecv(k_buf_recv.view<DeviceType>().data(),
|
||||
size_forward_recv[iswap],MPI_DOUBLE,
|
||||
recvproc[iswap],0,world,&request);
|
||||
}
|
||||
n = avec->pack_comm_vel_kokkos(sendnum[iswap],k_sendlist,iswap,
|
||||
k_buf_send,pbc_flag[iswap],pbc[iswap]);
|
||||
DeviceType::fence();
|
||||
if (n) {
|
||||
MPI_Send(k_buf_send.view<DeviceType>().data(),n,
|
||||
MPI_DOUBLE,sendproc[iswap],0,world);
|
||||
}
|
||||
if (size_forward_recv[iswap]) MPI_Wait(&request,MPI_STATUS_IGNORE);
|
||||
avec->unpack_comm_vel_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_recv);
|
||||
DeviceType::fence();
|
||||
} else {
|
||||
if (size_forward_recv[iswap])
|
||||
MPI_Irecv(k_buf_recv.view<DeviceType>().data(),
|
||||
size_forward_recv[iswap],MPI_DOUBLE,
|
||||
recvproc[iswap],0,world,&request);
|
||||
n = avec->pack_comm_kokkos(sendnum[iswap],k_sendlist,iswap,
|
||||
k_buf_send,pbc_flag[iswap],pbc[iswap]);
|
||||
DeviceType::fence();
|
||||
if (n)
|
||||
MPI_Send(k_buf_send.view<DeviceType>().data(),n,
|
||||
MPI_DOUBLE,sendproc[iswap],0,world);
|
||||
if (size_forward_recv[iswap]) MPI_Wait(&request,MPI_STATUS_IGNORE);
|
||||
avec->unpack_comm_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_recv);
|
||||
DeviceType::fence();
|
||||
}
|
||||
} else {
|
||||
if (!ghost_velocity) {
|
||||
if (sendnum[iswap])
|
||||
n = avec->pack_comm_self(sendnum[iswap],k_sendlist,iswap,
|
||||
firstrecv[iswap],pbc_flag[iswap],pbc[iswap]);
|
||||
DeviceType::fence();
|
||||
} else {
|
||||
n = avec->pack_comm_vel_kokkos(sendnum[iswap],k_sendlist,iswap,
|
||||
k_buf_send,pbc_flag[iswap],pbc[iswap]);
|
||||
DeviceType::fence();
|
||||
avec->unpack_comm_vel_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_send);
|
||||
DeviceType::fence();
|
||||
if (!ghost_velocity) {
|
||||
if (sendnum[iswap])
|
||||
n = avec->pack_comm_self(sendnum[iswap],k_sendlist,iswap,
|
||||
firstrecv[iswap],pbc_flag[iswap],pbc[iswap]);
|
||||
DeviceType::fence();
|
||||
} else {
|
||||
n = avec->pack_comm_vel_kokkos(sendnum[iswap],k_sendlist,iswap,
|
||||
k_buf_send,pbc_flag[iswap],pbc[iswap]);
|
||||
DeviceType::fence();
|
||||
avec->unpack_comm_vel_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_send);
|
||||
DeviceType::fence();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
|
||||
@ -89,8 +89,8 @@ U: Must use Kokkos half/thread or full neighbor list with threads or GPUs
|
||||
|
||||
Using Kokkos half-neighbor lists with threading is not allowed.
|
||||
|
||||
E: Must use KOKKOS package option 'neigh full' with 'neigh_thread on'
|
||||
E: Must use KOKKOS package option 'neigh full' with 'neigh/thread on'
|
||||
|
||||
The 'neigh_thread on' option requires a full neighbor list
|
||||
The 'neigh/thread on' option requires a full neighbor list
|
||||
|
||||
*/
|
||||
|
||||
@ -868,7 +868,7 @@ EV_FLOAT pair_compute_neighlist (PairStyle* fpair, typename Kokkos::Impl::enable
|
||||
EV_FLOAT ev;
|
||||
|
||||
if (!fpair->lmp->kokkos->neigh_thread_set)
|
||||
if (list->inum <= 16384)
|
||||
if (list->inum <= 16384 && NEIGHFLAG == FULL)
|
||||
fpair->lmp->kokkos->neigh_thread = 1;
|
||||
|
||||
if (fpair->lmp->kokkos->neigh_thread) {
|
||||
|
||||
Reference in New Issue
Block a user