Update docs

This commit is contained in:
Stan Moore
2023-12-19 10:46:41 -07:00
parent ab29200c60
commit 86f87e0f7b
2 changed files with 24 additions and 23 deletions

View File

@ -474,13 +474,13 @@ If the *neigh/thread* keyword is set to *off*, then the KOKKOS package
threads only over atoms. However, for small systems, this may not expose
enough parallelism to keep a GPU busy. When this keyword is set to *on*,
the KOKKOS package threads over both atoms and neighbors of atoms. When
using *neigh/thread* *on*, a full neighbor list must also be used. Using
*neigh/thread* *on* may be slower for large systems, so this this option
is turned on by default only when there are 16K atoms or less owned by
an MPI rank and when using a full neighbor list. Not all KOKKOS-enabled
potentials support this keyword yet, and only thread over atoms. Many
simple pairwise potentials such as Lennard-Jones do support threading
over both atoms and neighbors.
using *neigh/thread* *on*, the :doc:`newton pair <newton>` setting must
be "off". Using *neigh/thread* *on* may be slower for large systems, so
this this option is turned on by default only when running on one or
more GPUs and there are 16k atoms or less owned by an MPI rank. Not all
KOKKOS-enabled potentials support this keyword yet, and only thread over
atoms. Many simple pairwise potentials such as Lennard-Jones do support
threading over both atoms and neighbors.
If the *neigh/transpose* keyword is set to *off*, then the KOKKOS
package will use the same memory layout for building the neighbor list on
@ -732,7 +732,7 @@ comm = device, sort = device, neigh/transpose = off, gpu/aware = on. When
LAMMPS can safely detect that GPU-aware MPI is not available, the default value
of gpu/aware becomes "off". For CPUs or Xeon Phis, the option defaults are
neigh = half, neigh/qeq = half, newton = on, binsize = 0.0, comm = no, and sort
= no. The option neigh/thread = on when there are 16K atoms or less on an MPI
= no. For GPUs, option neigh/thread = on when there are 16k atoms or less on an MPI
rank, otherwise it is "off". These settings are made automatically by the
required "-k on" :doc:`command-line switch <Run_options>`. You can change them
by using the package kokkos command in your input script or via the :doc:`-pk

View File

@ -935,8 +935,10 @@ template<class PairStyle, unsigned NEIGHFLAG, int ZEROFLAG = 0, class Specialisa
EV_FLOAT pair_compute_neighlist (PairStyle* fpair, std::enable_if_t<(NEIGHFLAG&PairStyle::EnabledNeighFlags) != 0, NeighListKokkos<typename PairStyle::device_type>*> list) {
EV_FLOAT ev;
const int inum = list->inum;
if (!fpair->lmp->kokkos->neigh_thread_set)
if (list->inum <= 16384)
if (fpair->lmp->kokkos->ngpus && inum <= 16000)
if (NEIGHFLAG == FULL || !fpair->newton_pair)
fpair->lmp->kokkos->neigh_thread = 1;
@ -947,26 +949,26 @@ EV_FLOAT pair_compute_neighlist (PairStyle* fpair, std::enable_if_t<(NEIGHFLAG&P
static int lastcall = -1;
#if defined(LMP_KOKKOS_GPU)
#if defined(KOKKOS_ENABLE_HIP)
int max_vectorsize = 64;
#else
int max_vectorsize = 32;
#endif
if (!vectorsize || lastcall < fpair->lmp->neighbor->lastcall) {
lastcall = fpair->lmp->update->ntimestep;
vectorsize = GetMaxNeighs(list);
vectorsize = MathSpecial::powint(2,(int(log2(vectorsize) + 0.5))); // round to nearest power of 2
#if defined(KOKKOS_ENABLE_HIP)
int max_vectorsize = 64;
#else
int max_vectorsize = 32;
#endif
vectorsize = MIN(vectorsize,max_vectorsize);
int teamsize_max_for,teamsize_max_reduce;
if (fpair->atom->ntypes > MAX_TYPES_STACKPARAMS) {
PairComputeFunctor<PairStyle,NEIGHFLAG,false,ZEROFLAG,Specialisation > ff(fpair,list);
GetMaxTeamSize<typename PairStyle::device_type>(ff, list->inum, teamsize_max_for, teamsize_max_reduce);
GetMaxTeamSize<typename PairStyle::device_type>(ff, inum, teamsize_max_for, teamsize_max_reduce);
} else {
PairComputeFunctor<PairStyle,NEIGHFLAG,true,ZEROFLAG,Specialisation > ff(fpair,list);
GetMaxTeamSize<typename PairStyle::device_type>(ff, list->inum, teamsize_max_for, teamsize_max_reduce);
GetMaxTeamSize<typename PairStyle::device_type>(ff, inum, teamsize_max_for, teamsize_max_reduce);
}
int teamsize_max = teamsize_max_for;
@ -979,7 +981,6 @@ EV_FLOAT pair_compute_neighlist (PairStyle* fpair, std::enable_if_t<(NEIGHFLAG&P
atoms_per_team = 1;
#endif
const int inum = list->inum;
const int num_teams = inum / atoms_per_team + (inum % atoms_per_team ? 1 : 0);
if (fpair->atom->ntypes > MAX_TYPES_STACKPARAMS) {
@ -996,13 +997,13 @@ EV_FLOAT pair_compute_neighlist (PairStyle* fpair, std::enable_if_t<(NEIGHFLAG&P
} else {
if (fpair->atom->ntypes > MAX_TYPES_STACKPARAMS) {
PairComputeFunctor<PairStyle,NEIGHFLAG,false,ZEROFLAG,Specialisation > ff(fpair,list);
if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(list->inum,ff,ev);
else Kokkos::parallel_for(list->inum,ff);
if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(inum,ff,ev);
else Kokkos::parallel_for(inum,ff);
ff.contribute();
} else {
PairComputeFunctor<PairStyle,NEIGHFLAG,true,ZEROFLAG,Specialisation > ff(fpair,list);
if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(list->inum,ff,ev);
else Kokkos::parallel_for(list->inum,ff);
if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(inum,ff,ev);
else Kokkos::parallel_for(inum,ff);
ff.contribute();
}
}