Update docs
This commit is contained in:
@ -474,13 +474,13 @@ If the *neigh/thread* keyword is set to *off*, then the KOKKOS package
|
||||
threads only over atoms. However, for small systems, this may not expose
|
||||
enough parallelism to keep a GPU busy. When this keyword is set to *on*,
|
||||
the KOKKOS package threads over both atoms and neighbors of atoms. When
|
||||
using *neigh/thread* *on*, a full neighbor list must also be used. Using
|
||||
*neigh/thread* *on* may be slower for large systems, so this this option
|
||||
is turned on by default only when there are 16K atoms or less owned by
|
||||
an MPI rank and when using a full neighbor list. Not all KOKKOS-enabled
|
||||
potentials support this keyword yet, and only thread over atoms. Many
|
||||
simple pairwise potentials such as Lennard-Jones do support threading
|
||||
over both atoms and neighbors.
|
||||
using *neigh/thread* *on*, the :doc:`newton pair <newton>` setting must
|
||||
be "off". Using *neigh/thread* *on* may be slower for large systems, so
|
||||
this this option is turned on by default only when running on one or
|
||||
more GPUs and there are 16k atoms or less owned by an MPI rank. Not all
|
||||
KOKKOS-enabled potentials support this keyword yet, and only thread over
|
||||
atoms. Many simple pairwise potentials such as Lennard-Jones do support
|
||||
threading over both atoms and neighbors.
|
||||
|
||||
If the *neigh/transpose* keyword is set to *off*, then the KOKKOS
|
||||
package will use the same memory layout for building the neighbor list on
|
||||
@ -732,7 +732,7 @@ comm = device, sort = device, neigh/transpose = off, gpu/aware = on. When
|
||||
LAMMPS can safely detect that GPU-aware MPI is not available, the default value
|
||||
of gpu/aware becomes "off". For CPUs or Xeon Phis, the option defaults are
|
||||
neigh = half, neigh/qeq = half, newton = on, binsize = 0.0, comm = no, and sort
|
||||
= no. The option neigh/thread = on when there are 16K atoms or less on an MPI
|
||||
= no. For GPUs, option neigh/thread = on when there are 16k atoms or less on an MPI
|
||||
rank, otherwise it is "off". These settings are made automatically by the
|
||||
required "-k on" :doc:`command-line switch <Run_options>`. You can change them
|
||||
by using the package kokkos command in your input script or via the :doc:`-pk
|
||||
|
||||
@ -935,8 +935,10 @@ template<class PairStyle, unsigned NEIGHFLAG, int ZEROFLAG = 0, class Specialisa
|
||||
EV_FLOAT pair_compute_neighlist (PairStyle* fpair, std::enable_if_t<(NEIGHFLAG&PairStyle::EnabledNeighFlags) != 0, NeighListKokkos<typename PairStyle::device_type>*> list) {
|
||||
EV_FLOAT ev;
|
||||
|
||||
const int inum = list->inum;
|
||||
|
||||
if (!fpair->lmp->kokkos->neigh_thread_set)
|
||||
if (list->inum <= 16384)
|
||||
if (fpair->lmp->kokkos->ngpus && inum <= 16000)
|
||||
if (NEIGHFLAG == FULL || !fpair->newton_pair)
|
||||
fpair->lmp->kokkos->neigh_thread = 1;
|
||||
|
||||
@ -947,26 +949,26 @@ EV_FLOAT pair_compute_neighlist (PairStyle* fpair, std::enable_if_t<(NEIGHFLAG&P
|
||||
static int lastcall = -1;
|
||||
|
||||
#if defined(LMP_KOKKOS_GPU)
|
||||
|
||||
#if defined(KOKKOS_ENABLE_HIP)
|
||||
int max_vectorsize = 64;
|
||||
#else
|
||||
int max_vectorsize = 32;
|
||||
#endif
|
||||
|
||||
if (!vectorsize || lastcall < fpair->lmp->neighbor->lastcall) {
|
||||
lastcall = fpair->lmp->update->ntimestep;
|
||||
vectorsize = GetMaxNeighs(list);
|
||||
vectorsize = MathSpecial::powint(2,(int(log2(vectorsize) + 0.5))); // round to nearest power of 2
|
||||
|
||||
#if defined(KOKKOS_ENABLE_HIP)
|
||||
int max_vectorsize = 64;
|
||||
#else
|
||||
int max_vectorsize = 32;
|
||||
#endif
|
||||
|
||||
vectorsize = MIN(vectorsize,max_vectorsize);
|
||||
|
||||
int teamsize_max_for,teamsize_max_reduce;
|
||||
if (fpair->atom->ntypes > MAX_TYPES_STACKPARAMS) {
|
||||
PairComputeFunctor<PairStyle,NEIGHFLAG,false,ZEROFLAG,Specialisation > ff(fpair,list);
|
||||
GetMaxTeamSize<typename PairStyle::device_type>(ff, list->inum, teamsize_max_for, teamsize_max_reduce);
|
||||
GetMaxTeamSize<typename PairStyle::device_type>(ff, inum, teamsize_max_for, teamsize_max_reduce);
|
||||
} else {
|
||||
PairComputeFunctor<PairStyle,NEIGHFLAG,true,ZEROFLAG,Specialisation > ff(fpair,list);
|
||||
GetMaxTeamSize<typename PairStyle::device_type>(ff, list->inum, teamsize_max_for, teamsize_max_reduce);
|
||||
GetMaxTeamSize<typename PairStyle::device_type>(ff, inum, teamsize_max_for, teamsize_max_reduce);
|
||||
}
|
||||
|
||||
int teamsize_max = teamsize_max_for;
|
||||
@ -979,7 +981,6 @@ EV_FLOAT pair_compute_neighlist (PairStyle* fpair, std::enable_if_t<(NEIGHFLAG&P
|
||||
atoms_per_team = 1;
|
||||
#endif
|
||||
|
||||
const int inum = list->inum;
|
||||
const int num_teams = inum / atoms_per_team + (inum % atoms_per_team ? 1 : 0);
|
||||
|
||||
if (fpair->atom->ntypes > MAX_TYPES_STACKPARAMS) {
|
||||
@ -996,13 +997,13 @@ EV_FLOAT pair_compute_neighlist (PairStyle* fpair, std::enable_if_t<(NEIGHFLAG&P
|
||||
} else {
|
||||
if (fpair->atom->ntypes > MAX_TYPES_STACKPARAMS) {
|
||||
PairComputeFunctor<PairStyle,NEIGHFLAG,false,ZEROFLAG,Specialisation > ff(fpair,list);
|
||||
if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(list->inum,ff,ev);
|
||||
else Kokkos::parallel_for(list->inum,ff);
|
||||
if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(inum,ff,ev);
|
||||
else Kokkos::parallel_for(inum,ff);
|
||||
ff.contribute();
|
||||
} else {
|
||||
PairComputeFunctor<PairStyle,NEIGHFLAG,true,ZEROFLAG,Specialisation > ff(fpair,list);
|
||||
if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(list->inum,ff,ev);
|
||||
else Kokkos::parallel_for(list->inum,ff);
|
||||
if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(inum,ff,ev);
|
||||
else Kokkos::parallel_for(inum,ff);
|
||||
ff.contribute();
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user