Files
lammps/src/INTEL/npair_full_bin_intel.cpp
2025-01-31 17:49:24 -05:00

135 lines
4.8 KiB
C++

// clang-format off
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
LAMMPS development team: developers@lammps.org
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: W. Michael Brown (Intel)
------------------------------------------------------------------------- */
#include "npair_full_bin_intel.h"
#include "atom.h"
#include "comm.h"
#include "error.h"
#include "neigh_list.h"
#include "neighbor.h"
using namespace LAMMPS_NS;
/* ---------------------------------------------------------------------- */
NPairFullBinIntel::NPairFullBinIntel(LAMMPS *lmp) : NPairIntel(lmp) {}
/* ----------------------------------------------------------------------
binned neighbor list construction for all neighbors
every neighbor pair appears in list of both atoms i and j
------------------------------------------------------------------------- */
void NPairFullBinIntel::build(NeighList *list)
{
if (nstencil > INTEL_MAX_STENCIL_CHECK)
error->all(FLERR, "Too many neighbor bins for INTEL package" + utils::errorurl(9));
#ifdef _LMP_INTEL_OFFLOAD
if (exclude)
error->all(FLERR, "Exclusion lists not yet supported for Intel offload");
#endif
if (_fix->precision() == FixIntel::PREC_MODE_MIXED)
fbi(list, _fix->get_mixed_buffers());
else if (_fix->precision() == FixIntel::PREC_MODE_DOUBLE)
fbi(list, _fix->get_double_buffers());
else
fbi(list, _fix->get_single_buffers());
_fix->stop_watch(TIME_HOST_NEIGHBOR);
}
template <class flt_t, class acc_t>
void NPairFullBinIntel::
fbi(NeighList *list, IntelBuffers<flt_t,acc_t> *buffers) {
const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
list->inum = nlocal;
list->gnum = 0;
int host_start = _fix->host_start_neighbor();
const int off_end = _fix->offload_end_neighbor();
#ifdef _LMP_INTEL_OFFLOAD
if (off_end) grow_stencil();
if (_fix->full_host_list()) host_start = 0;
int offload_noghost = _fix->offload_noghost();
#endif
buffers->grow_list(list, atom->nlocal, comm->nthreads,
_fix->three_body_neighbor(), off_end,
_fix->nbor_pack_width());
int need_ic = 0;
if (atom->molecular != Atom::ATOMIC)
dminimum_image_check(need_ic, neighbor->cutneighmax, neighbor->cutneighmax,
neighbor->cutneighmax);
#ifdef _LMP_INTEL_OFFLOAD
if (_fix->three_body_neighbor()) {
if (need_ic) {
if (offload_noghost) {
bin_newton<flt_t,acc_t,1,1,1,0,1>(1, list, buffers, 0, off_end);
bin_newton<flt_t,acc_t,1,1,1,0,1>(0, list, buffers, host_start, nlocal, off_end);
} else {
bin_newton<flt_t,acc_t,0,1,1,0,1>(1, list, buffers, 0, off_end);
bin_newton<flt_t,acc_t,0,1,1,0,1>(0, list, buffers, host_start, nlocal);
}
} else {
if (offload_noghost) {
bin_newton<flt_t,acc_t,1,0,1,0,1>(1, list, buffers, 0, off_end);
bin_newton<flt_t,acc_t,1,0,1,0,1>(0, list, buffers, host_start, nlocal, off_end);
} else {
bin_newton<flt_t,acc_t,0,0,1,0,1>(1, list, buffers, 0, off_end);
bin_newton<flt_t,acc_t,0,0,1,0,1>(0, list, buffers, host_start, nlocal);
}
}
} else {
if (need_ic) {
if (offload_noghost) {
bin_newton<flt_t,acc_t,1,1,1,0,0>(1, list, buffers, 0, off_end);
bin_newton<flt_t,acc_t,1,1,1,0,0>(0, list, buffers, host_start, nlocal, off_end);
} else {
bin_newton<flt_t,acc_t,0,1,1,0,0>(1, list, buffers, 0, off_end);
bin_newton<flt_t,acc_t,0,1,1,0,0>(0, list, buffers, host_start, nlocal);
}
} else {
if (offload_noghost) {
bin_newton<flt_t,acc_t,1,0,1,0,0>(1, list, buffers, 0, off_end);
bin_newton<flt_t,acc_t,1,0,1,0,0>(0, list, buffers, host_start, nlocal, off_end);
} else {
bin_newton<flt_t,acc_t,0,0,1,0,0>(1, list, buffers, 0, off_end);
bin_newton<flt_t,acc_t,0,0,1,0,0>(0, list, buffers, host_start, nlocal);
}
}
}
#else
if (_fix->three_body_neighbor()) {
if (need_ic)
bin_newton<flt_t,acc_t,0,1,1,0,1>(0, list, buffers, host_start, nlocal);
else
bin_newton<flt_t,acc_t,0,0,1,0,1>(0, list, buffers, host_start, nlocal);
} else {
if (need_ic)
bin_newton<flt_t,acc_t,0,1,1,0,0>(0, list, buffers, host_start, nlocal);
else
bin_newton<flt_t,acc_t,0,0,1,0,0>(0, list, buffers, host_start, nlocal);
}
#endif
}