Only use team with full neigh list
This commit is contained in:
@ -450,10 +450,14 @@ qeq/reax/kk"_fix_qeq_reax.html. If not explicitly set, the value of
|
|||||||
If the {neigh/thread} keyword is set to {off}, then the KOKKOS package
|
If the {neigh/thread} keyword is set to {off}, then the KOKKOS package
|
||||||
threads only over atoms. However, for small systems, this may not expose
|
threads only over atoms. However, for small systems, this may not expose
|
||||||
enough parallelism to keep a GPU busy. When this keyword is set to {on},
|
enough parallelism to keep a GPU busy. When this keyword is set to {on},
|
||||||
the KOKKOS package threads over both atoms and neighbors of atoms.
|
the KOKKOS package threads over both atoms and neighbors of atoms. When
|
||||||
Using {neigh/thread} {on} may be slower for large systems, so this this
|
using {neigh/thread} {on}, a full neighbor list must also be used. Using
|
||||||
option is turned on by default only when there are 16K atoms or less
|
{neigh/thread} {on} may be slower for large systems, so this this option
|
||||||
owned by an MPI rank.
|
is turned on by default only when there are 16K atoms or less owned by
|
||||||
|
an MPI rank and when using a full neighbor list. Not all KOKKOS-enabled
|
||||||
|
potentials support this keyword yet, and only thread over atoms. Many
|
||||||
|
simple pair-wise potentials such as Lennard-Jones do support threading
|
||||||
|
over both atoms and neighbors.
|
||||||
|
|
||||||
The {newton} keyword sets the Newton flags for pairwise and bonded
|
The {newton} keyword sets the Newton flags for pairwise and bonded
|
||||||
interactions to {off} or {on}, the same as the "newton"_newton.html
|
interactions to {off} or {on}, the same as the "newton"_newton.html
|
||||||
|
|||||||
@ -194,75 +194,75 @@ void CommKokkos::forward_comm_device(int dummy)
|
|||||||
k_firstrecv,k_pbc_flag,k_pbc,k_g2l);
|
k_firstrecv,k_pbc_flag,k_pbc,k_g2l);
|
||||||
} else {
|
} else {
|
||||||
|
|
||||||
for (int iswap = 0; iswap < nswap; iswap++) {
|
for (int iswap = 0; iswap < nswap; iswap++) {
|
||||||
if (sendproc[iswap] != me) {
|
if (sendproc[iswap] != me) {
|
||||||
if (comm_x_only) {
|
if (comm_x_only) {
|
||||||
if (size_forward_recv[iswap]) {
|
if (size_forward_recv[iswap]) {
|
||||||
buf = atomKK->k_x.view<DeviceType>().data() +
|
buf = atomKK->k_x.view<DeviceType>().data() +
|
||||||
firstrecv[iswap]*atomKK->k_x.view<DeviceType>().extent(1);
|
firstrecv[iswap]*atomKK->k_x.view<DeviceType>().extent(1);
|
||||||
MPI_Irecv(buf,size_forward_recv[iswap],MPI_DOUBLE,
|
MPI_Irecv(buf,size_forward_recv[iswap],MPI_DOUBLE,
|
||||||
recvproc[iswap],0,world,&request);
|
recvproc[iswap],0,world,&request);
|
||||||
}
|
}
|
||||||
n = avec->pack_comm_kokkos(sendnum[iswap],k_sendlist,
|
n = avec->pack_comm_kokkos(sendnum[iswap],k_sendlist,
|
||||||
iswap,k_buf_send,pbc_flag[iswap],pbc[iswap]);
|
iswap,k_buf_send,pbc_flag[iswap],pbc[iswap]);
|
||||||
DeviceType::fence();
|
DeviceType::fence();
|
||||||
if (n) {
|
if (n) {
|
||||||
MPI_Send(k_buf_send.view<DeviceType>().data(),
|
MPI_Send(k_buf_send.view<DeviceType>().data(),
|
||||||
n,MPI_DOUBLE,sendproc[iswap],0,world);
|
n,MPI_DOUBLE,sendproc[iswap],0,world);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (size_forward_recv[iswap]) {
|
if (size_forward_recv[iswap]) {
|
||||||
MPI_Wait(&request,MPI_STATUS_IGNORE);
|
MPI_Wait(&request,MPI_STATUS_IGNORE);
|
||||||
atomKK->modified(ExecutionSpaceFromDevice<DeviceType>::
|
atomKK->modified(ExecutionSpaceFromDevice<DeviceType>::
|
||||||
space,X_MASK);
|
space,X_MASK);
|
||||||
|
}
|
||||||
|
} else if (ghost_velocity) {
|
||||||
|
if (size_forward_recv[iswap]) {
|
||||||
|
MPI_Irecv(k_buf_recv.view<DeviceType>().data(),
|
||||||
|
size_forward_recv[iswap],MPI_DOUBLE,
|
||||||
|
recvproc[iswap],0,world,&request);
|
||||||
|
}
|
||||||
|
n = avec->pack_comm_vel_kokkos(sendnum[iswap],k_sendlist,iswap,
|
||||||
|
k_buf_send,pbc_flag[iswap],pbc[iswap]);
|
||||||
|
DeviceType::fence();
|
||||||
|
if (n) {
|
||||||
|
MPI_Send(k_buf_send.view<DeviceType>().data(),n,
|
||||||
|
MPI_DOUBLE,sendproc[iswap],0,world);
|
||||||
|
}
|
||||||
|
if (size_forward_recv[iswap]) MPI_Wait(&request,MPI_STATUS_IGNORE);
|
||||||
|
avec->unpack_comm_vel_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_recv);
|
||||||
|
DeviceType::fence();
|
||||||
|
} else {
|
||||||
|
if (size_forward_recv[iswap])
|
||||||
|
MPI_Irecv(k_buf_recv.view<DeviceType>().data(),
|
||||||
|
size_forward_recv[iswap],MPI_DOUBLE,
|
||||||
|
recvproc[iswap],0,world,&request);
|
||||||
|
n = avec->pack_comm_kokkos(sendnum[iswap],k_sendlist,iswap,
|
||||||
|
k_buf_send,pbc_flag[iswap],pbc[iswap]);
|
||||||
|
DeviceType::fence();
|
||||||
|
if (n)
|
||||||
|
MPI_Send(k_buf_send.view<DeviceType>().data(),n,
|
||||||
|
MPI_DOUBLE,sendproc[iswap],0,world);
|
||||||
|
if (size_forward_recv[iswap]) MPI_Wait(&request,MPI_STATUS_IGNORE);
|
||||||
|
avec->unpack_comm_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_recv);
|
||||||
|
DeviceType::fence();
|
||||||
}
|
}
|
||||||
} else if (ghost_velocity) {
|
|
||||||
if (size_forward_recv[iswap]) {
|
|
||||||
MPI_Irecv(k_buf_recv.view<DeviceType>().data(),
|
|
||||||
size_forward_recv[iswap],MPI_DOUBLE,
|
|
||||||
recvproc[iswap],0,world,&request);
|
|
||||||
}
|
|
||||||
n = avec->pack_comm_vel_kokkos(sendnum[iswap],k_sendlist,iswap,
|
|
||||||
k_buf_send,pbc_flag[iswap],pbc[iswap]);
|
|
||||||
DeviceType::fence();
|
|
||||||
if (n) {
|
|
||||||
MPI_Send(k_buf_send.view<DeviceType>().data(),n,
|
|
||||||
MPI_DOUBLE,sendproc[iswap],0,world);
|
|
||||||
}
|
|
||||||
if (size_forward_recv[iswap]) MPI_Wait(&request,MPI_STATUS_IGNORE);
|
|
||||||
avec->unpack_comm_vel_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_recv);
|
|
||||||
DeviceType::fence();
|
|
||||||
} else {
|
} else {
|
||||||
if (size_forward_recv[iswap])
|
if (!ghost_velocity) {
|
||||||
MPI_Irecv(k_buf_recv.view<DeviceType>().data(),
|
if (sendnum[iswap])
|
||||||
size_forward_recv[iswap],MPI_DOUBLE,
|
n = avec->pack_comm_self(sendnum[iswap],k_sendlist,iswap,
|
||||||
recvproc[iswap],0,world,&request);
|
firstrecv[iswap],pbc_flag[iswap],pbc[iswap]);
|
||||||
n = avec->pack_comm_kokkos(sendnum[iswap],k_sendlist,iswap,
|
DeviceType::fence();
|
||||||
k_buf_send,pbc_flag[iswap],pbc[iswap]);
|
} else {
|
||||||
DeviceType::fence();
|
n = avec->pack_comm_vel_kokkos(sendnum[iswap],k_sendlist,iswap,
|
||||||
if (n)
|
k_buf_send,pbc_flag[iswap],pbc[iswap]);
|
||||||
MPI_Send(k_buf_send.view<DeviceType>().data(),n,
|
DeviceType::fence();
|
||||||
MPI_DOUBLE,sendproc[iswap],0,world);
|
avec->unpack_comm_vel_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_send);
|
||||||
if (size_forward_recv[iswap]) MPI_Wait(&request,MPI_STATUS_IGNORE);
|
DeviceType::fence();
|
||||||
avec->unpack_comm_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_recv);
|
}
|
||||||
DeviceType::fence();
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if (!ghost_velocity) {
|
|
||||||
if (sendnum[iswap])
|
|
||||||
n = avec->pack_comm_self(sendnum[iswap],k_sendlist,iswap,
|
|
||||||
firstrecv[iswap],pbc_flag[iswap],pbc[iswap]);
|
|
||||||
DeviceType::fence();
|
|
||||||
} else {
|
|
||||||
n = avec->pack_comm_vel_kokkos(sendnum[iswap],k_sendlist,iswap,
|
|
||||||
k_buf_send,pbc_flag[iswap],pbc[iswap]);
|
|
||||||
DeviceType::fence();
|
|
||||||
avec->unpack_comm_vel_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_send);
|
|
||||||
DeviceType::fence();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
|
|||||||
@ -89,8 +89,8 @@ U: Must use Kokkos half/thread or full neighbor list with threads or GPUs
|
|||||||
|
|
||||||
Using Kokkos half-neighbor lists with threading is not allowed.
|
Using Kokkos half-neighbor lists with threading is not allowed.
|
||||||
|
|
||||||
E: Must use KOKKOS package option 'neigh full' with 'neigh_thread on'
|
E: Must use KOKKOS package option 'neigh full' with 'neigh/thread on'
|
||||||
|
|
||||||
The 'neigh_thread on' option requires a full neighbor list
|
The 'neigh/thread on' option requires a full neighbor list
|
||||||
|
|
||||||
*/
|
*/
|
||||||
|
|||||||
@ -868,7 +868,7 @@ EV_FLOAT pair_compute_neighlist (PairStyle* fpair, typename Kokkos::Impl::enable
|
|||||||
EV_FLOAT ev;
|
EV_FLOAT ev;
|
||||||
|
|
||||||
if (!fpair->lmp->kokkos->neigh_thread_set)
|
if (!fpair->lmp->kokkos->neigh_thread_set)
|
||||||
if (list->inum <= 16384)
|
if (list->inum <= 16384 && NEIGHFLAG == FULL)
|
||||||
fpair->lmp->kokkos->neigh_thread = 1;
|
fpair->lmp->kokkos->neigh_thread = 1;
|
||||||
|
|
||||||
if (fpair->lmp->kokkos->neigh_thread) {
|
if (fpair->lmp->kokkos->neigh_thread) {
|
||||||
|
|||||||
Reference in New Issue
Block a user