Reorganizing intel npair to work with cmake, adding intel stencil

This commit is contained in:
jtclemm
2023-03-28 13:55:12 -06:00
parent 781eb934c1
commit b456beb62f
20 changed files with 797 additions and 882 deletions

View File

@ -16,7 +16,7 @@
Contributing authors: W. Michael Brown (Intel)
------------------------------------------------------------------------- */
#include "npair_full_bin_ghost_intel.h"
#include "npair_bin_ghost_intel.h"
#include "atom.h"
#include "comm.h"

View File

@ -25,8 +25,8 @@ NPairStyle(full/bin/ghost/intel,
// clang-format on
#else
#ifndef LMP_NPAIR_FULL_BIN_GHOST_INTEL_H
#define LMP_NPAIR_FULL_BIN_GHOST_INTEL_H
#ifndef LMP_NPAIR_BIN_GHOST_INTEL_H
#define LMP_NPAIR_BIN_GHOST_INTEL_H
#include "npair_intel.h"

View File

@ -0,0 +1,298 @@
// clang-format off
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
LAMMPS development team: developers@lammps.org
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: W. Michael Brown (Intel)
------------------------------------------------------------------------- */
#include "npair_bin_intel.h"
#include "atom.h"
#include "comm.h"
#include "error.h"
#include "neigh_list.h"
#include "neighbor.h"
using namespace LAMMPS_NS;
/* ---------------------------------------------------------------------- */
NPairHalfBinNewtonIntel::NPairHalfBinNewtonIntel(LAMMPS *lmp) :
NPairIntel(lmp) {}
/* ----------------------------------------------------------------------
binned neighbor list construction with full Newton's 3rd law
each owned atom i checks its own bin and other bins in Newton stencil
every pair stored exactly once by some processor
------------------------------------------------------------------------- */
void NPairHalfBinNewtonIntel::build(NeighList *list)
{
if (nstencil / 2 > INTEL_MAX_STENCIL_CHECK)
error->all(FLERR, "Too many neighbor bins for INTEL package.");
#ifdef _LMP_INTEL_OFFLOAD
if (exclude)
error->all(FLERR, "Exclusion lists not yet supported for Intel offload");
#endif
if (_fix->precision() == FixIntel::PREC_MODE_MIXED)
hbni(list, _fix->get_mixed_buffers());
else if (_fix->precision() == FixIntel::PREC_MODE_DOUBLE)
hbni(list, _fix->get_double_buffers());
else
hbni(list, _fix->get_single_buffers());
_fix->stop_watch(TIME_HOST_NEIGHBOR);
}
template <class flt_t, class acc_t>
void NPairHalfBinNewtonIntel::
hbni(NeighList *list, IntelBuffers<flt_t,acc_t> *buffers) {
const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
list->inum = nlocal;
int host_start = _fix->host_start_neighbor();
const int off_end = _fix->offload_end_neighbor();
#ifdef _LMP_INTEL_OFFLOAD
if (off_end) grow_stencil();
if (_fix->full_host_list()) host_start = 0;
int offload_noghost = _fix->offload_noghost();
#endif
buffers->grow_list(list, atom->nlocal, comm->nthreads, 0, off_end);
int need_ic = 0;
if (atom->molecular != Atom::ATOMIC)
dminimum_image_check(need_ic, neighbor->cutneighmax, neighbor->cutneighmax,
neighbor->cutneighmax);
#ifdef _LMP_INTEL_OFFLOAD
if (need_ic) {
if (offload_noghost) {
bin_newton<flt_t,acc_t,1,1,0,0,0>(1, list, buffers, 0, off_end);
bin_newton<flt_t,acc_t,1,1,0,0,0>(0, list, buffers, host_start, nlocal,
off_end);
} else {
bin_newton<flt_t,acc_t,0,1,0,0,0>(1, list, buffers, 0, off_end);
bin_newton<flt_t,acc_t,0,1,0,0,0>(0, list, buffers, host_start, nlocal);
}
} else {
if (offload_noghost) {
bin_newton<flt_t,acc_t,1,0,0,0,0>(1, list, buffers, 0, off_end);
bin_newton<flt_t,acc_t,1,0,0,0,0>(0, list, buffers, host_start, nlocal,
off_end);
} else {
bin_newton<flt_t,acc_t,0,0,0,0,0>(1, list, buffers, 0, off_end);
bin_newton<flt_t,acc_t,0,0,0,0,0>(0, list, buffers, host_start, nlocal);
}
}
#else
if (need_ic)
bin_newton<flt_t,acc_t,0,1,0,0,0>(0, list, buffers, host_start, nlocal);
else
bin_newton<flt_t,acc_t,0,0,0,0,0>(0, list, buffers, host_start, nlocal);
#endif
}
/* ---------------------------------------------------------------------- */
NPairHalfBinNewtonTriIntel::NPairHalfBinNewtonTriIntel(LAMMPS *lmp) :
NPairIntel(lmp) {}
/* ----------------------------------------------------------------------
binned neighbor list construction with Newton's 3rd law for triclinic
each owned atom i checks its own bin and other bins in triclinic stencil
every pair stored exactly once by some processor
------------------------------------------------------------------------- */
void NPairHalfBinNewtonTriIntel::build(NeighList *list)
{
if (nstencil > INTEL_MAX_STENCIL)
error->all(FLERR, "Too many neighbor bins for INTEL package.");
#ifdef _LMP_INTEL_OFFLOAD
if (exclude)
error->all(FLERR, "Exclusion lists not yet supported for Intel offload");
#endif
if (_fix->precision() == FixIntel::PREC_MODE_MIXED)
hbnti(list, _fix->get_mixed_buffers());
else if (_fix->precision() == FixIntel::PREC_MODE_DOUBLE)
hbnti(list, _fix->get_double_buffers());
else
hbnti(list, _fix->get_single_buffers());
_fix->stop_watch(TIME_HOST_NEIGHBOR);
}
template <class flt_t, class acc_t>
void NPairHalfBinNewtonTriIntel::
hbnti(NeighList *list, IntelBuffers<flt_t,acc_t> *buffers) {
const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
list->inum = nlocal;
int host_start = _fix->host_start_neighbor();
const int off_end = _fix->offload_end_neighbor();
#ifdef _LMP_INTEL_OFFLOAD
if (off_end) grow_stencil();
if (_fix->full_host_list()) host_start = 0;
int offload_noghost = _fix->offload_noghost();
#endif
buffers->grow_list(list, atom->nlocal, comm->nthreads, 0, off_end);
int need_ic = 0;
if (atom->molecular != Atom::ATOMIC)
dminimum_image_check(need_ic, neighbor->cutneighmax, neighbor->cutneighmax,
neighbor->cutneighmax);
#ifdef _LMP_INTEL_OFFLOAD
if (need_ic) {
if (offload_noghost) {
bin_newton<flt_t,acc_t,1,1,0,1,0>(1, list, buffers, 0, off_end);
bin_newton<flt_t,acc_t,1,1,0,1,0>(0, list, buffers, host_start, nlocal,
off_end);
} else {
bin_newton<flt_t,acc_t,0,1,0,1,0>(1, list, buffers, 0, off_end);
bin_newton<flt_t,acc_t,0,1,0,1,0>(0, list, buffers, host_start, nlocal);
}
} else {
if (offload_noghost) {
bin_newton<flt_t,acc_t,1,0,0,1,0>(1, list, buffers, 0, off_end);
bin_newton<flt_t,acc_t,1,0,0,1,0>(0, list, buffers, host_start, nlocal,
off_end);
} else {
bin_newton<flt_t,acc_t,0,0,0,1,0>(1, list, buffers, 0, off_end);
bin_newton<flt_t,acc_t,0,0,0,1,0>(0, list, buffers, host_start, nlocal);
}
}
#else
if (need_ic)
bin_newton<flt_t,acc_t,0,1,0,1,0>(0, list, buffers, host_start, nlocal);
else
bin_newton<flt_t,acc_t,0,0,0,1,0>(0, list, buffers, host_start, nlocal);
#endif
}
/* ---------------------------------------------------------------------- */
NPairFullBinIntel::NPairFullBinIntel(LAMMPS *lmp) : NPairIntel(lmp) {}
/* ----------------------------------------------------------------------
binned neighbor list construction for all neighbors
every neighbor pair appears in list of both atoms i and j
------------------------------------------------------------------------- */
void NPairFullBinIntel::build(NeighList *list)
{
if (nstencil > INTEL_MAX_STENCIL_CHECK)
error->all(FLERR, "Too many neighbor bins for INTEL package.");
#ifdef _LMP_INTEL_OFFLOAD
if (exclude)
error->all(FLERR, "Exclusion lists not yet supported for Intel offload");
#endif
if (_fix->precision() == FixIntel::PREC_MODE_MIXED)
fbi(list, _fix->get_mixed_buffers());
else if (_fix->precision() == FixIntel::PREC_MODE_DOUBLE)
fbi(list, _fix->get_double_buffers());
else
fbi(list, _fix->get_single_buffers());
_fix->stop_watch(TIME_HOST_NEIGHBOR);
}
template <class flt_t, class acc_t>
void NPairFullBinIntel::
fbi(NeighList *list, IntelBuffers<flt_t,acc_t> *buffers) {
const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
list->inum = nlocal;
list->gnum = 0;
int host_start = _fix->host_start_neighbor();;
const int off_end = _fix->offload_end_neighbor();
#ifdef _LMP_INTEL_OFFLOAD
if (off_end) grow_stencil();
if (_fix->full_host_list()) host_start = 0;
int offload_noghost = _fix->offload_noghost();
#endif
buffers->grow_list(list, atom->nlocal, comm->nthreads,
_fix->three_body_neighbor(), off_end,
_fix->nbor_pack_width());
int need_ic = 0;
if (atom->molecular != Atom::ATOMIC)
dminimum_image_check(need_ic, neighbor->cutneighmax, neighbor->cutneighmax,
neighbor->cutneighmax);
#ifdef _LMP_INTEL_OFFLOAD
if (_fix->three_body_neighbor()) {
if (need_ic) {
if (offload_noghost) {
bin_newton<flt_t,acc_t,1,1,1,0,1>(1, list, buffers, 0, off_end);
bin_newton<flt_t,acc_t,1,1,1,0,1>(0, list, buffers, host_start, nlocal, off_end);
} else {
bin_newton<flt_t,acc_t,0,1,1,0,1>(1, list, buffers, 0, off_end);
bin_newton<flt_t,acc_t,0,1,1,0,1>(0, list, buffers, host_start, nlocal);
}
} else {
if (offload_noghost) {
bin_newton<flt_t,acc_t,1,0,1,0,1>(1, list, buffers, 0, off_end);
bin_newton<flt_t,acc_t,1,0,1,0,1>(0, list, buffers, host_start, nlocal, off_end);
} else {
bin_newton<flt_t,acc_t,0,0,1,0,1>(1, list, buffers, 0, off_end);
bin_newton<flt_t,acc_t,0,0,1,0,1>(0, list, buffers, host_start, nlocal);
}
}
} else {
if (need_ic) {
if (offload_noghost) {
bin_newton<flt_t,acc_t,1,1,1,0,0>(1, list, buffers, 0, off_end);
bin_newton<flt_t,acc_t,1,1,1,0,0>(0, list, buffers, host_start, nlocal, off_end);
} else {
bin_newton<flt_t,acc_t,0,1,1,0,0>(1, list, buffers, 0, off_end);
bin_newton<flt_t,acc_t,0,1,1,0,0>(0, list, buffers, host_start, nlocal);
}
} else {
if (offload_noghost) {
bin_newton<flt_t,acc_t,1,0,1,0,0>(1, list, buffers, 0, off_end);
bin_newton<flt_t,acc_t,1,0,1,0,0>(0, list, buffers, host_start, nlocal, off_end);
} else {
bin_newton<flt_t,acc_t,0,0,1,0,0>(1, list, buffers, 0, off_end);
bin_newton<flt_t,acc_t,0,0,1,0,0>(0, list, buffers, host_start, nlocal);
}
}
}
#else
if (_fix->three_body_neighbor()) {
if (need_ic)
bin_newton<flt_t,acc_t,0,1,1,0,1>(0, list, buffers, host_start, nlocal);
else
bin_newton<flt_t,acc_t,0,0,1,0,1>(0, list, buffers, host_start, nlocal);
} else {
if (need_ic)
bin_newton<flt_t,acc_t,0,1,1,0,0>(0, list, buffers, host_start, nlocal);
else
bin_newton<flt_t,acc_t,0,0,1,0,0>(0, list, buffers, host_start, nlocal);
}
#endif
}

View File

@ -14,20 +14,38 @@
#ifdef NPAIR_CLASS
// clang-format off
NPairStyle(half/bin/newton/intel,
NPairHalfBinNewtonIntel,
NP_HALF | NP_BIN | NP_NEWTON | NP_ORTHO | NP_INTEL);
NPairStyle(half/bin/newton/tri/intel,
NPairHalfBinNewtonTriIntel,
NP_HALF | NP_BIN | NP_NEWTON | NP_TRI | NP_INTEL);
NPairStyle(full/bin/intel,
NPairFullBinIntel,
NP_FULL | NP_BIN | NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI |
NP_INTEL);
// clang-format on
#else
#ifndef LMP_NPAIR_HALF_BIN_NEWTON_INTEL_TRI_H
#define LMP_NPAIR_HALF_BIN_NEWTON_INTEL_TRI_H
#ifndef LMP_NPAIR_BIN_INTEL_H
#define LMP_NPAIR_BIN_INTEL_H
#include "fix_intel.h"
#include "npair_intel.h"
namespace LAMMPS_NS {
class NPairHalfBinNewtonIntel : public NPairIntel {
public:
NPairHalfBinNewtonIntel(class LAMMPS *);
void build(class NeighList *) override;
private:
template <class flt_t, class acc_t> void hbni(NeighList *, IntelBuffers<flt_t, acc_t> *);
};
class NPairHalfBinNewtonTriIntel : public NPairIntel {
public:
NPairHalfBinNewtonTriIntel(class LAMMPS *);
@ -37,6 +55,15 @@ class NPairHalfBinNewtonTriIntel : public NPairIntel {
template <class flt_t, class acc_t> void hbnti(NeighList *, IntelBuffers<flt_t, acc_t> *);
};
class NPairFullBinIntel : public NPairIntel {
public:
NPairFullBinIntel(class LAMMPS *);
void build(class NeighList *) override;
private:
template <class flt_t, class acc_t> void fbi(NeighList *, IntelBuffers<flt_t, acc_t> *);
};
} // namespace LAMMPS_NS
#endif

View File

@ -1,134 +0,0 @@
// clang-format off
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
LAMMPS development team: developers@lammps.org
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: W. Michael Brown (Intel)
------------------------------------------------------------------------- */
#include "npair_full_bin_intel.h"
#include "atom.h"
#include "comm.h"
#include "error.h"
#include "neigh_list.h"
#include "neighbor.h"
using namespace LAMMPS_NS;
/* ---------------------------------------------------------------------- */
NPairFullBinIntel::NPairFullBinIntel(LAMMPS *lmp) : NPairIntel(lmp) {}
/* ----------------------------------------------------------------------
binned neighbor list construction for all neighbors
every neighbor pair appears in list of both atoms i and j
------------------------------------------------------------------------- */
void NPairFullBinIntel::build(NeighList *list)
{
if (nstencil > INTEL_MAX_STENCIL_CHECK)
error->all(FLERR, "Too many neighbor bins for INTEL package.");
#ifdef _LMP_INTEL_OFFLOAD
if (exclude)
error->all(FLERR, "Exclusion lists not yet supported for Intel offload");
#endif
if (_fix->precision() == FixIntel::PREC_MODE_MIXED)
fbi(list, _fix->get_mixed_buffers());
else if (_fix->precision() == FixIntel::PREC_MODE_DOUBLE)
fbi(list, _fix->get_double_buffers());
else
fbi(list, _fix->get_single_buffers());
_fix->stop_watch(TIME_HOST_NEIGHBOR);
}
template <class flt_t, class acc_t>
void NPairFullBinIntel::
fbi(NeighList *list, IntelBuffers<flt_t,acc_t> *buffers) {
const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
list->inum = nlocal;
list->gnum = 0;
int host_start = _fix->host_start_neighbor();;
const int off_end = _fix->offload_end_neighbor();
#ifdef _LMP_INTEL_OFFLOAD
if (off_end) grow_stencil();
if (_fix->full_host_list()) host_start = 0;
int offload_noghost = _fix->offload_noghost();
#endif
buffers->grow_list(list, atom->nlocal, comm->nthreads,
_fix->three_body_neighbor(), off_end,
_fix->nbor_pack_width());
int need_ic = 0;
if (atom->molecular != Atom::ATOMIC)
dminimum_image_check(need_ic, neighbor->cutneighmax, neighbor->cutneighmax,
neighbor->cutneighmax);
#ifdef _LMP_INTEL_OFFLOAD
if (_fix->three_body_neighbor()) {
if (need_ic) {
if (offload_noghost) {
bin_newton<flt_t,acc_t,1,1,1,0,1>(1, list, buffers, 0, off_end);
bin_newton<flt_t,acc_t,1,1,1,0,1>(0, list, buffers, host_start, nlocal, off_end);
} else {
bin_newton<flt_t,acc_t,0,1,1,0,1>(1, list, buffers, 0, off_end);
bin_newton<flt_t,acc_t,0,1,1,0,1>(0, list, buffers, host_start, nlocal);
}
} else {
if (offload_noghost) {
bin_newton<flt_t,acc_t,1,0,1,0,1>(1, list, buffers, 0, off_end);
bin_newton<flt_t,acc_t,1,0,1,0,1>(0, list, buffers, host_start, nlocal, off_end);
} else {
bin_newton<flt_t,acc_t,0,0,1,0,1>(1, list, buffers, 0, off_end);
bin_newton<flt_t,acc_t,0,0,1,0,1>(0, list, buffers, host_start, nlocal);
}
}
} else {
if (need_ic) {
if (offload_noghost) {
bin_newton<flt_t,acc_t,1,1,1,0,0>(1, list, buffers, 0, off_end);
bin_newton<flt_t,acc_t,1,1,1,0,0>(0, list, buffers, host_start, nlocal, off_end);
} else {
bin_newton<flt_t,acc_t,0,1,1,0,0>(1, list, buffers, 0, off_end);
bin_newton<flt_t,acc_t,0,1,1,0,0>(0, list, buffers, host_start, nlocal);
}
} else {
if (offload_noghost) {
bin_newton<flt_t,acc_t,1,0,1,0,0>(1, list, buffers, 0, off_end);
bin_newton<flt_t,acc_t,1,0,1,0,0>(0, list, buffers, host_start, nlocal, off_end);
} else {
bin_newton<flt_t,acc_t,0,0,1,0,0>(1, list, buffers, 0, off_end);
bin_newton<flt_t,acc_t,0,0,1,0,0>(0, list, buffers, host_start, nlocal);
}
}
}
#else
if (_fix->three_body_neighbor()) {
if (need_ic)
bin_newton<flt_t,acc_t,0,1,1,0,1>(0, list, buffers, host_start, nlocal);
else
bin_newton<flt_t,acc_t,0,0,1,0,1>(0, list, buffers, host_start, nlocal);
} else {
if (need_ic)
bin_newton<flt_t,acc_t,0,1,1,0,0>(0, list, buffers, host_start, nlocal);
else
bin_newton<flt_t,acc_t,0,0,1,0,0>(0, list, buffers, host_start, nlocal);
}
#endif
}

View File

@ -1,44 +0,0 @@
// clang-format off
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
LAMMPS development team: developers@lammps.org
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef NPAIR_CLASS
// clang-format off
NPairStyle(full/bin/intel,
NPairFullBinIntel,
NP_FULL | NP_BIN | NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI |
NP_INTEL);
// clang-format on
#else
#ifndef LMP_NPAIR_FULL_BIN_INTEL_H
#define LMP_NPAIR_FULL_BIN_INTEL_H
#include "fix_intel.h"
#include "npair_intel.h"
namespace LAMMPS_NS {
class NPairFullBinIntel : public NPairIntel {
public:
NPairFullBinIntel(class LAMMPS *);
void build(class NeighList *) override;
private:
template <class flt_t, class acc_t> void fbi(NeighList *, IntelBuffers<flt_t, acc_t> *);
};
} // namespace LAMMPS_NS
#endif
#endif

View File

@ -1,108 +0,0 @@
// clang-format off
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
LAMMPS development team: developers@lammps.org
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: W. Michael Brown (Intel)
------------------------------------------------------------------------- */
#include "npair_half_bin_newton_intel.h"
#include "atom.h"
#include "comm.h"
#include "error.h"
#include "neigh_list.h"
#include "neighbor.h"
using namespace LAMMPS_NS;
/* ---------------------------------------------------------------------- */
NPairHalfBinNewtonIntel::NPairHalfBinNewtonIntel(LAMMPS *lmp) :
NPairIntel(lmp) {}
/* ----------------------------------------------------------------------
binned neighbor list construction with full Newton's 3rd law
each owned atom i checks its own bin and other bins in Newton stencil
every pair stored exactly once by some processor
------------------------------------------------------------------------- */
void NPairHalfBinNewtonIntel::build(NeighList *list)
{
if (nstencil / 2 > INTEL_MAX_STENCIL_CHECK)
error->all(FLERR, "Too many neighbor bins for INTEL package.");
#ifdef _LMP_INTEL_OFFLOAD
if (exclude)
error->all(FLERR, "Exclusion lists not yet supported for Intel offload");
#endif
if (_fix->precision() == FixIntel::PREC_MODE_MIXED)
hbni(list, _fix->get_mixed_buffers());
else if (_fix->precision() == FixIntel::PREC_MODE_DOUBLE)
hbni(list, _fix->get_double_buffers());
else
hbni(list, _fix->get_single_buffers());
_fix->stop_watch(TIME_HOST_NEIGHBOR);
}
template <class flt_t, class acc_t>
void NPairHalfBinNewtonIntel::
hbni(NeighList *list, IntelBuffers<flt_t,acc_t> *buffers) {
const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
list->inum = nlocal;
int host_start = _fix->host_start_neighbor();
const int off_end = _fix->offload_end_neighbor();
#ifdef _LMP_INTEL_OFFLOAD
if (off_end) grow_stencil();
if (_fix->full_host_list()) host_start = 0;
int offload_noghost = _fix->offload_noghost();
#endif
buffers->grow_list(list, atom->nlocal, comm->nthreads, 0, off_end);
int need_ic = 0;
if (atom->molecular != Atom::ATOMIC)
dminimum_image_check(need_ic, neighbor->cutneighmax, neighbor->cutneighmax,
neighbor->cutneighmax);
#ifdef _LMP_INTEL_OFFLOAD
if (need_ic) {
if (offload_noghost) {
bin_newton<flt_t,acc_t,1,1,0,0,0>(1, list, buffers, 0, off_end);
bin_newton<flt_t,acc_t,1,1,0,0,0>(0, list, buffers, host_start, nlocal,
off_end);
} else {
bin_newton<flt_t,acc_t,0,1,0,0,0>(1, list, buffers, 0, off_end);
bin_newton<flt_t,acc_t,0,1,0,0,0>(0, list, buffers, host_start, nlocal);
}
} else {
if (offload_noghost) {
bin_newton<flt_t,acc_t,1,0,0,0,0>(1, list, buffers, 0, off_end);
bin_newton<flt_t,acc_t,1,0,0,0,0>(0, list, buffers, host_start, nlocal,
off_end);
} else {
bin_newton<flt_t,acc_t,0,0,0,0,0>(1, list, buffers, 0, off_end);
bin_newton<flt_t,acc_t,0,0,0,0,0>(0, list, buffers, host_start, nlocal);
}
}
#else
if (need_ic)
bin_newton<flt_t,acc_t,0,1,0,0,0>(0, list, buffers, host_start, nlocal);
else
bin_newton<flt_t,acc_t,0,0,0,0,0>(0, list, buffers, host_start, nlocal);
#endif
}

View File

@ -1,43 +0,0 @@
// clang-format off
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
LAMMPS development team: developers@lammps.org
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef NPAIR_CLASS
// clang-format off
NPairStyle(half/bin/newton/intel,
NPairHalfBinNewtonIntel,
NP_HALF | NP_BIN | NP_NEWTON | NP_ORTHO | NP_INTEL);
// clang-format on
#else
#ifndef LMP_NPAIR_HALF_BIN_NEWTON_INTEL_H
#define LMP_NPAIR_HALF_BIN_NEWTON_INTEL_H
#include "fix_intel.h"
#include "npair_intel.h"
namespace LAMMPS_NS {
class NPairHalfBinNewtonIntel : public NPairIntel {
public:
NPairHalfBinNewtonIntel(class LAMMPS *);
void build(class NeighList *) override;
private:
template <class flt_t, class acc_t> void hbni(NeighList *, IntelBuffers<flt_t, acc_t> *);
};
} // namespace LAMMPS_NS
#endif
#endif

View File

@ -1,108 +0,0 @@
// clang-format off
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
LAMMPS development team: developers@lammps.org
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: W. Michael Brown (Intel)
------------------------------------------------------------------------- */
#include "npair_half_bin_newton_tri_intel.h"
#include "atom.h"
#include "comm.h"
#include "error.h"
#include "neigh_list.h"
#include "neighbor.h"
using namespace LAMMPS_NS;
/* ---------------------------------------------------------------------- */
NPairHalfBinNewtonTriIntel::NPairHalfBinNewtonTriIntel(LAMMPS *lmp) :
NPairIntel(lmp) {}
/* ----------------------------------------------------------------------
binned neighbor list construction with Newton's 3rd law for triclinic
each owned atom i checks its own bin and other bins in triclinic stencil
every pair stored exactly once by some processor
------------------------------------------------------------------------- */
void NPairHalfBinNewtonTriIntel::build(NeighList *list)
{
if (nstencil > INTEL_MAX_STENCIL)
error->all(FLERR, "Too many neighbor bins for INTEL package.");
#ifdef _LMP_INTEL_OFFLOAD
if (exclude)
error->all(FLERR, "Exclusion lists not yet supported for Intel offload");
#endif
if (_fix->precision() == FixIntel::PREC_MODE_MIXED)
hbnti(list, _fix->get_mixed_buffers());
else if (_fix->precision() == FixIntel::PREC_MODE_DOUBLE)
hbnti(list, _fix->get_double_buffers());
else
hbnti(list, _fix->get_single_buffers());
_fix->stop_watch(TIME_HOST_NEIGHBOR);
}
template <class flt_t, class acc_t>
void NPairHalfBinNewtonTriIntel::
hbnti(NeighList *list, IntelBuffers<flt_t,acc_t> *buffers) {
const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
list->inum = nlocal;
int host_start = _fix->host_start_neighbor();
const int off_end = _fix->offload_end_neighbor();
#ifdef _LMP_INTEL_OFFLOAD
if (off_end) grow_stencil();
if (_fix->full_host_list()) host_start = 0;
int offload_noghost = _fix->offload_noghost();
#endif
buffers->grow_list(list, atom->nlocal, comm->nthreads, 0, off_end);
int need_ic = 0;
if (atom->molecular != Atom::ATOMIC)
dminimum_image_check(need_ic, neighbor->cutneighmax, neighbor->cutneighmax,
neighbor->cutneighmax);
#ifdef _LMP_INTEL_OFFLOAD
if (need_ic) {
if (offload_noghost) {
bin_newton<flt_t,acc_t,1,1,0,1,0>(1, list, buffers, 0, off_end);
bin_newton<flt_t,acc_t,1,1,0,1,0>(0, list, buffers, host_start, nlocal,
off_end);
} else {
bin_newton<flt_t,acc_t,0,1,0,1,0>(1, list, buffers, 0, off_end);
bin_newton<flt_t,acc_t,0,1,0,1,0>(0, list, buffers, host_start, nlocal);
}
} else {
if (offload_noghost) {
bin_newton<flt_t,acc_t,1,0,0,1,0>(1, list, buffers, 0, off_end);
bin_newton<flt_t,acc_t,1,0,0,1,0>(0, list, buffers, host_start, nlocal,
off_end);
} else {
bin_newton<flt_t,acc_t,0,0,0,1,0>(1, list, buffers, 0, off_end);
bin_newton<flt_t,acc_t,0,0,0,1,0>(0, list, buffers, host_start, nlocal);
}
}
#else
if (need_ic)
bin_newton<flt_t,acc_t,0,1,0,1,0>(0, list, buffers, host_start, nlocal);
else
bin_newton<flt_t,acc_t,0,0,0,1,0>(0, list, buffers, host_start, nlocal);
#endif
}

View File

@ -13,10 +13,10 @@
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: Stan Moore (SNL)
Contributing author: W. Michael Brown (Intel)
------------------------------------------------------------------------- */
#include "npair_halffull_newton_trim_intel.h"
#include "npair_halffull_intel.h"
#include "atom.h"
#include "comm.h"
@ -29,6 +29,204 @@ using namespace LAMMPS_NS;
/* ---------------------------------------------------------------------- */
NPairHalffullNewtonIntel::NPairHalffullNewtonIntel(LAMMPS *lmp) : NPair(lmp) {
_fix = static_cast<FixIntel *>(modify->get_fix_by_id("package_intel"));
if (!_fix) error->all(FLERR, "The 'package intel' command is required for /intel styles");
}
/* ----------------------------------------------------------------------
build half list from full list
pair stored once if i,j are both owned and i < j
if j is ghost, only store if j coords are "above and to the right" of i
works if full list is a skip list
------------------------------------------------------------------------- */
template <class flt_t, class acc_t>
void NPairHalffullNewtonIntel::build_t(NeighList *list,
IntelBuffers<flt_t,acc_t> *buffers)
{
const int inum_full = list->listfull->inum;
const int nlocal = atom->nlocal;
const int e_nall = nlocal + atom->nghost;
const ATOM_T * _noalias const x = buffers->get_x();
int * _noalias const ilist = list->ilist;
int * _noalias const numneigh = list->numneigh;
int ** _noalias const firstneigh = list->firstneigh;
const int * _noalias const ilist_full = list->listfull->ilist;
const int * _noalias const numneigh_full = list->listfull->numneigh;
const int ** _noalias const firstneigh_full = (const int ** const)list->listfull->firstneigh; // NOLINT
#if defined(_OPENMP)
#pragma omp parallel
#endif
{
int tid, ifrom, ito;
IP_PRE_omp_range_id(ifrom, ito, tid, inum_full, comm->nthreads);
// each thread has its own page allocator
MyPage<int> &ipage = list->ipage[tid];
ipage.reset();
// loop over parent full list
for (int ii = ifrom; ii < ito; ii++) {
int n = 0;
int *neighptr = ipage.vget();
const int i = ilist_full[ii];
const flt_t xtmp = x[i].x;
const flt_t ytmp = x[i].y;
const flt_t ztmp = x[i].z;
// loop over full neighbor list
const int * _noalias const jlist = firstneigh_full[i];
const int jnum = numneigh_full[i];
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#pragma ivdep
#endif
for (int jj = 0; jj < jnum; jj++) {
const int joriginal = jlist[jj];
const int j = joriginal & NEIGHMASK;
int addme = 1;
if (j < nlocal) {
if (i > j) addme = 0;
} else {
if (x[j].z < ztmp) addme = 0;
if (x[j].z == ztmp) {
if (x[j].y < ytmp) addme = 0;
if (x[j].y == ytmp && x[j].x < xtmp) addme = 0;
}
}
if (addme)
neighptr[n++] = joriginal;
}
ilist[ii] = i;
firstneigh[i] = neighptr;
numneigh[i] = n;
int pad_end = n;
IP_PRE_neighbor_pad(pad_end, 0);
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#pragma loop_count min=1, max=INTEL_COMPILE_WIDTH-1, \
avg=INTEL_COMPILE_WIDTH/2
#endif
for ( ; n < pad_end; n++)
neighptr[n] = e_nall;
ipage.vgot(n);
if (ipage.status())
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
}
}
list->inum = inum_full;
}
/* ----------------------------------------------------------------------
build half list from full 3-body list
half list is already stored as first part of 3-body list
------------------------------------------------------------------------- */
template <class flt_t>
void NPairHalffullNewtonIntel::build_t3(NeighList *list, int *numhalf)
{
const int inum_full = list->listfull->inum;
const int e_nall = atom->nlocal + atom->nghost;
int * _noalias const ilist = list->ilist;
int * _noalias const numneigh = list->numneigh;
int ** _noalias const firstneigh = list->firstneigh;
const int * _noalias const ilist_full = list->listfull->ilist;
const int * _noalias const numneigh_full = numhalf;
const int ** _noalias const firstneigh_full = (const int ** const)list->listfull->firstneigh; // NOLINT
int packthreads = 1;
if (comm->nthreads > INTEL_HTHREADS) packthreads = comm->nthreads;
#if defined(_OPENMP)
#pragma omp parallel if (packthreads > 1)
#endif
{
int tid, ifrom, ito;
IP_PRE_omp_range_id(ifrom, ito, tid, inum_full, packthreads);
// each thread has its own page allocator
MyPage<int> &ipage = list->ipage[tid];
ipage.reset();
// loop over parent full list
for (int ii = ifrom; ii < ito; ii++) {
int n = 0;
int *neighptr = ipage.vget();
const int i = ilist_full[ii];
// loop over full neighbor list
const int * _noalias const jlist = firstneigh_full[i];
const int jnum = numneigh_full[ii];
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#pragma ivdep
#endif
for (int jj = 0; jj < jnum; jj++) {
const int joriginal = jlist[jj];
neighptr[n++] = joriginal;
}
ilist[ii] = i;
firstneigh[i] = neighptr;
numneigh[i] = n;
int pad_end = n;
IP_PRE_neighbor_pad(pad_end, 0);
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#pragma loop_count min=1, max=INTEL_COMPILE_WIDTH-1, \
avg=INTEL_COMPILE_WIDTH/2
#endif
for ( ; n < pad_end; n++)
neighptr[n] = e_nall;
ipage.vgot(n);
if (ipage.status())
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
}
}
list->inum = inum_full;
}
/* ---------------------------------------------------------------------- */
void NPairHalffullNewtonIntel::build(NeighList *list)
{
if (_fix->three_body_neighbor() == 0) {
if (_fix->precision() == FixIntel::PREC_MODE_MIXED)
build_t(list, _fix->get_mixed_buffers());
else if (_fix->precision() == FixIntel::PREC_MODE_DOUBLE)
build_t(list, _fix->get_double_buffers());
else
build_t(list, _fix->get_single_buffers());
} else {
int *nhalf, *cnum;
if (_fix->precision() == FixIntel::PREC_MODE_MIXED) {
_fix->get_mixed_buffers()->get_list_data3(list->listfull, nhalf, cnum);
build_t3<float>(list, nhalf);
} else if (_fix->precision() == FixIntel::PREC_MODE_DOUBLE) {
_fix->get_double_buffers()->get_list_data3(list->listfull, nhalf, cnum);
build_t3<double>(list, nhalf);
} else {
_fix->get_single_buffers()->get_list_data3(list->listfull, nhalf, cnum);
build_t3<float>(list, nhalf);
}
}
}
/* ---------------------------------------------------------------------- */
NPairHalffullNewtonTrimIntel::NPairHalffullNewtonTrimIntel(LAMMPS *lmp) : NPair(lmp) {
_fix = static_cast<FixIntel *>(modify->get_fix_by_id("package_intel"));
if (!_fix) error->all(FLERR, "The 'package intel' command is required for /intel styles");

View File

@ -0,0 +1,128 @@
// clang-format off
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
LAMMPS development team: developers@lammps.org
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: W. Michael Brown (Intel)
------------------------------------------------------------------------- */
// For Newton off, only used for hybrid to generate list for non-intel style.
// Use standard routines.
#ifdef NPAIR_CLASS
// clang-format off
NPairStyle(halffull/newton/intel,
NPairHalffullNewtonIntel,
NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
NP_ORTHO | NP_TRI| NP_INTEL);
NPairStyle(halffull/newton/skip/intel,
NPairHalffullNewtonIntel,
NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
NP_ORTHO | NP_TRI | NP_SKIP | NP_INTEL);
NPairStyle(halffull/newtoff/intel,
NPairHalffullNewtoff,
NP_HALF_FULL | NP_NEWTOFF | NP_NSQ | NP_BIN | NP_MULTI | NP_HALF |
NP_ORTHO | NP_TRI | NP_INTEL);
NPairStyle(halffull/newtoff/skip/intel,
NPairHalffullNewtoff,
NP_HALF_FULL | NP_NEWTOFF | NP_NSQ | NP_BIN | NP_MULTI | NP_HALF |
NP_ORTHO | NP_TRI | NP_SKIP | NP_INTEL);
NPairStyle(halffull/newtoff/ghost/intel,
NPairHalffullNewtoff,
NP_HALF_FULL | NP_NEWTOFF | NP_NSQ | NP_BIN | NP_MULTI | NP_HALF |
NP_ORTHO | NP_TRI | NP_GHOST | NP_INTEL);
NPairStyle(halffull/newtoff/skip/ghost/intel,
NPairHalffullNewtoff,
NP_HALF_FULL | NP_NEWTOFF | NP_NSQ | NP_BIN | NP_MULTI | NP_HALF |
NP_ORTHO | NP_TRI | NP_SKIP | NP_GHOST | NP_INTEL);
NPairStyle(halffull/newton/trim/intel,
NPairHalffullNewtonTrimIntel,
NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
NP_ORTHO | NP_TRI| NP_TRIM | NP_INTEL);
NPairStyle(halffull/newton/skip/trim/intel,
NPairHalffullNewtonTrimIntel,
NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
NP_ORTHO | NP_TRI | NP_SKIP | NP_TRIM | NP_INTEL);
NPairStyle(halffull/newtoff/trim/intel,
NPairHalffullNewtoffTrim,
NP_HALF_FULL | NP_NEWTOFF | NP_NSQ | NP_BIN | NP_MULTI | NP_HALF |
NP_ORTHO | NP_TRI | NP_TRIM | NP_INTEL);
NPairStyle(halffull/newtoff/skip/trim/intel,
NPairHalffullNewtoffTrim,
NP_HALF_FULL | NP_NEWTOFF | NP_NSQ | NP_BIN | NP_MULTI | NP_HALF |
NP_ORTHO | NP_TRI | NP_SKIP | NP_TRIM | NP_INTEL);
NPairStyle(halffull/newtoff/ghost/trim/intel,
NPairHalffullNewtoffTrim,
NP_HALF_FULL | NP_NEWTOFF | NP_NSQ | NP_BIN | NP_MULTI | NP_HALF |
NP_ORTHO | NP_TRI | NP_GHOST | NP_TRIM | NP_INTEL);
NPairStyle(halffull/newtoff/skip/ghost/trim/intel,
NPairHalffullNewtoffTrim,
NP_HALF_FULL | NP_NEWTOFF | NP_NSQ | NP_BIN | NP_MULTI | NP_HALF |
NP_ORTHO | NP_TRI | NP_SKIP | NP_GHOST | NP_TRIM | NP_INTEL);
// clang-format on
#else
#ifndef LMP_NPAIR_HALFFULL_INTEL_H
#define LMP_NPAIR_HALFFULL_INTEL_H
#include "fix_intel.h"
#include "npair.h"
#if defined(_OPENMP)
#include <omp.h>
#endif
namespace LAMMPS_NS {
class NPairHalffullNewtonIntel : public NPair {
public:
NPairHalffullNewtonIntel(class LAMMPS *);
void build(class NeighList *) override;
protected:
FixIntel *_fix;
template <class flt_t, class acc_t> void build_t(NeighList *, IntelBuffers<flt_t, acc_t> *);
template <class flt_t> void build_t3(NeighList *, int *);
};
class NPairHalffullNewtonTrimIntel : public NPair {
public:
NPairHalffullNewtonTrimIntel(class LAMMPS *);
void build(class NeighList *) override;
protected:
FixIntel *_fix;
template <class flt_t, class acc_t> void build_t(NeighList *, IntelBuffers<flt_t, acc_t> *);
template <class flt_t, class acc_t> void build_t3(NeighList *, int *, IntelBuffers<flt_t, acc_t> *);
};
} // namespace LAMMPS_NS
#endif
#endif

View File

@ -1,44 +0,0 @@
// clang-format off
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
LAMMPS development team: developers@lammps.org
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: W. Michael Brown (Intel)
------------------------------------------------------------------------- */
// Only used for hybrid to generate list for non-intel style. Use
// standard routines.
#ifdef NPAIR_CLASS
// clang-format off
NPairStyle(halffull/newtoff/intel,
NPairHalffullNewtoff,
NP_HALF_FULL | NP_NEWTOFF | NP_NSQ | NP_BIN | NP_MULTI | NP_HALF |
NP_ORTHO | NP_TRI | NP_INTEL);
NPairStyle(halffull/newtoff/skip/intel,
NPairHalffullNewtoff,
NP_HALF_FULL | NP_NEWTOFF | NP_NSQ | NP_BIN | NP_MULTI | NP_HALF |
NP_ORTHO | NP_TRI | NP_SKIP | NP_INTEL);
NPairStyle(halffull/newtoff/ghost/intel,
NPairHalffullNewtoff,
NP_HALF_FULL | NP_NEWTOFF | NP_NSQ | NP_BIN | NP_MULTI | NP_HALF |
NP_ORTHO | NP_TRI | NP_GHOST | NP_INTEL);
NPairStyle(halffull/newtoff/skip/ghost/intel,
NPairHalffullNewtoff,
NP_HALF_FULL | NP_NEWTOFF | NP_NSQ | NP_BIN | NP_MULTI | NP_HALF |
NP_ORTHO | NP_TRI | NP_SKIP | NP_GHOST | NP_INTEL);
// clang-format on
#endif

View File

@ -1,44 +0,0 @@
// clang-format off
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
LAMMPS development team: developers@lammps.org
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: Stan Moore (SNL)
------------------------------------------------------------------------- */
// Only used for hybrid to generate list for non-intel style. Use
// standard routines.
#ifdef NPAIR_CLASS
// clang-format off
NPairStyle(halffull/newtoff/trim/intel,
NPairHalffullNewtoffTrim,
NP_HALF_FULL | NP_NEWTOFF | NP_NSQ | NP_BIN | NP_MULTI | NP_HALF |
NP_ORTHO | NP_TRI | NP_TRIM | NP_INTEL);
NPairStyle(halffull/newtoff/skip/trim/intel,
NPairHalffullNewtoffTrim,
NP_HALF_FULL | NP_NEWTOFF | NP_NSQ | NP_BIN | NP_MULTI | NP_HALF |
NP_ORTHO | NP_TRI | NP_SKIP | NP_TRIM | NP_INTEL);
NPairStyle(halffull/newtoff/ghost/trim/intel,
NPairHalffullNewtoffTrim,
NP_HALF_FULL | NP_NEWTOFF | NP_NSQ | NP_BIN | NP_MULTI | NP_HALF |
NP_ORTHO | NP_TRI | NP_GHOST | NP_TRIM | NP_INTEL);
NPairStyle(halffull/newtoff/skip/ghost/trim/intel,
NPairHalffullNewtoffTrim,
NP_HALF_FULL | NP_NEWTOFF | NP_NSQ | NP_BIN | NP_MULTI | NP_HALF |
NP_ORTHO | NP_TRI | NP_SKIP | NP_GHOST | NP_TRIM | NP_INTEL);
// clang-format on
#endif

View File

@ -1,226 +0,0 @@
// clang-format off
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
LAMMPS development team: developers@lammps.org
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: W. Michael Brown (Intel)
------------------------------------------------------------------------- */
#include "npair_halffull_newton_intel.h"
#include "atom.h"
#include "comm.h"
#include "error.h"
#include "modify.h"
#include "my_page.h"
#include "neigh_list.h"
using namespace LAMMPS_NS;
/* ---------------------------------------------------------------------- */
NPairHalffullNewtonIntel::NPairHalffullNewtonIntel(LAMMPS *lmp) : NPair(lmp) {
_fix = static_cast<FixIntel *>(modify->get_fix_by_id("package_intel"));
if (!_fix) error->all(FLERR, "The 'package intel' command is required for /intel styles");
}
/* ----------------------------------------------------------------------
build half list from full list
pair stored once if i,j are both owned and i < j
if j is ghost, only store if j coords are "above and to the right" of i
works if full list is a skip list
------------------------------------------------------------------------- */
template <class flt_t, class acc_t>
void NPairHalffullNewtonIntel::build_t(NeighList *list,
IntelBuffers<flt_t,acc_t> *buffers)
{
const int inum_full = list->listfull->inum;
const int nlocal = atom->nlocal;
const int e_nall = nlocal + atom->nghost;
const ATOM_T * _noalias const x = buffers->get_x();
int * _noalias const ilist = list->ilist;
int * _noalias const numneigh = list->numneigh;
int ** _noalias const firstneigh = list->firstneigh;
const int * _noalias const ilist_full = list->listfull->ilist;
const int * _noalias const numneigh_full = list->listfull->numneigh;
const int ** _noalias const firstneigh_full = (const int ** const)list->listfull->firstneigh; // NOLINT
#if defined(_OPENMP)
#pragma omp parallel
#endif
{
int tid, ifrom, ito;
IP_PRE_omp_range_id(ifrom, ito, tid, inum_full, comm->nthreads);
// each thread has its own page allocator
MyPage<int> &ipage = list->ipage[tid];
ipage.reset();
// loop over parent full list
for (int ii = ifrom; ii < ito; ii++) {
int n = 0;
int *neighptr = ipage.vget();
const int i = ilist_full[ii];
const flt_t xtmp = x[i].x;
const flt_t ytmp = x[i].y;
const flt_t ztmp = x[i].z;
// loop over full neighbor list
const int * _noalias const jlist = firstneigh_full[i];
const int jnum = numneigh_full[i];
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#pragma ivdep
#endif
for (int jj = 0; jj < jnum; jj++) {
const int joriginal = jlist[jj];
const int j = joriginal & NEIGHMASK;
int addme = 1;
if (j < nlocal) {
if (i > j) addme = 0;
} else {
if (x[j].z < ztmp) addme = 0;
if (x[j].z == ztmp) {
if (x[j].y < ytmp) addme = 0;
if (x[j].y == ytmp && x[j].x < xtmp) addme = 0;
}
}
if (addme)
neighptr[n++] = joriginal;
}
ilist[ii] = i;
firstneigh[i] = neighptr;
numneigh[i] = n;
int pad_end = n;
IP_PRE_neighbor_pad(pad_end, 0);
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#pragma loop_count min=1, max=INTEL_COMPILE_WIDTH-1, \
avg=INTEL_COMPILE_WIDTH/2
#endif
for ( ; n < pad_end; n++)
neighptr[n] = e_nall;
ipage.vgot(n);
if (ipage.status())
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
}
}
list->inum = inum_full;
}
/* ----------------------------------------------------------------------
build half list from full 3-body list
half list is already stored as first part of 3-body list
------------------------------------------------------------------------- */
template <class flt_t>
void NPairHalffullNewtonIntel::build_t3(NeighList *list, int *numhalf)
{
const int inum_full = list->listfull->inum;
const int e_nall = atom->nlocal + atom->nghost;
int * _noalias const ilist = list->ilist;
int * _noalias const numneigh = list->numneigh;
int ** _noalias const firstneigh = list->firstneigh;
const int * _noalias const ilist_full = list->listfull->ilist;
const int * _noalias const numneigh_full = numhalf;
const int ** _noalias const firstneigh_full = (const int ** const)list->listfull->firstneigh; // NOLINT
int packthreads = 1;
if (comm->nthreads > INTEL_HTHREADS) packthreads = comm->nthreads;
#if defined(_OPENMP)
#pragma omp parallel if (packthreads > 1)
#endif
{
int tid, ifrom, ito;
IP_PRE_omp_range_id(ifrom, ito, tid, inum_full, packthreads);
// each thread has its own page allocator
MyPage<int> &ipage = list->ipage[tid];
ipage.reset();
// loop over parent full list
for (int ii = ifrom; ii < ito; ii++) {
int n = 0;
int *neighptr = ipage.vget();
const int i = ilist_full[ii];
// loop over full neighbor list
const int * _noalias const jlist = firstneigh_full[i];
const int jnum = numneigh_full[ii];
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#pragma ivdep
#endif
for (int jj = 0; jj < jnum; jj++) {
const int joriginal = jlist[jj];
neighptr[n++] = joriginal;
}
ilist[ii] = i;
firstneigh[i] = neighptr;
numneigh[i] = n;
int pad_end = n;
IP_PRE_neighbor_pad(pad_end, 0);
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#pragma loop_count min=1, max=INTEL_COMPILE_WIDTH-1, \
avg=INTEL_COMPILE_WIDTH/2
#endif
for ( ; n < pad_end; n++)
neighptr[n] = e_nall;
ipage.vgot(n);
if (ipage.status())
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
}
}
list->inum = inum_full;
}
/* ---------------------------------------------------------------------- */
void NPairHalffullNewtonIntel::build(NeighList *list)
{
if (_fix->three_body_neighbor() == 0) {
if (_fix->precision() == FixIntel::PREC_MODE_MIXED)
build_t(list, _fix->get_mixed_buffers());
else if (_fix->precision() == FixIntel::PREC_MODE_DOUBLE)
build_t(list, _fix->get_double_buffers());
else
build_t(list, _fix->get_single_buffers());
} else {
int *nhalf, *cnum;
if (_fix->precision() == FixIntel::PREC_MODE_MIXED) {
_fix->get_mixed_buffers()->get_list_data3(list->listfull, nhalf, cnum);
build_t3<float>(list, nhalf);
} else if (_fix->precision() == FixIntel::PREC_MODE_DOUBLE) {
_fix->get_double_buffers()->get_list_data3(list->listfull, nhalf, cnum);
build_t3<double>(list, nhalf);
} else {
_fix->get_single_buffers()->get_list_data3(list->listfull, nhalf, cnum);
build_t3<float>(list, nhalf);
}
}
}

View File

@ -1,61 +0,0 @@
// clang-format off
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
LAMMPS development team: developers@lammps.org
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: W. Michael Brown (Intel)
------------------------------------------------------------------------- */
#ifdef NPAIR_CLASS
// clang-format off
NPairStyle(halffull/newton/intel,
NPairHalffullNewtonIntel,
NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
NP_ORTHO | NP_TRI| NP_INTEL);
NPairStyle(halffull/newton/skip/intel,
NPairHalffullNewtonIntel,
NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
NP_ORTHO | NP_TRI | NP_SKIP | NP_INTEL);
// clang-format on
#else
#ifndef LMP_NPAIR_HALFFULL_NEWTON_INTEL_H
#define LMP_NPAIR_HALFFULL_NEWTON_INTEL_H
#include "fix_intel.h"
#include "npair.h"
#if defined(_OPENMP)
#include <omp.h>
#endif
namespace LAMMPS_NS {
class NPairHalffullNewtonIntel : public NPair {
public:
NPairHalffullNewtonIntel(class LAMMPS *);
void build(class NeighList *) override;
protected:
FixIntel *_fix;
template <class flt_t, class acc_t> void build_t(NeighList *, IntelBuffers<flt_t, acc_t> *);
template <class flt_t> void build_t3(NeighList *, int *);
};
} // namespace LAMMPS_NS
#endif
#endif

View File

@ -1,61 +0,0 @@
// clang-format off
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
LAMMPS development team: developers@lammps.org
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: Stan Moore (SNL)
------------------------------------------------------------------------- */
#ifdef NPAIR_CLASS
// clang-format off
NPairStyle(halffull/newton/trim/intel,
NPairHalffullNewtonTrimIntel,
NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
NP_ORTHO | NP_TRI| NP_TRIM | NP_INTEL);
NPairStyle(halffull/newton/skip/trim/intel,
NPairHalffullNewtonTrimIntel,
NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
NP_ORTHO | NP_TRI | NP_SKIP | NP_TRIM | NP_INTEL);
// clang-format on
#else
#ifndef LMP_NPAIR_HALFFULL_NEWTON_TRIM_INTEL_H
#define LMP_NPAIR_HALFFULL_NEWTON_TRIM_INTEL_H
#include "fix_intel.h"
#include "npair.h"
#if defined(_OPENMP)
#include <omp.h>
#endif
namespace LAMMPS_NS {
class NPairHalffullNewtonTrimIntel : public NPair {
public:
NPairHalffullNewtonTrimIntel(class LAMMPS *);
void build(class NeighList *) override;
protected:
FixIntel *_fix;
template <class flt_t, class acc_t> void build_t(NeighList *, IntelBuffers<flt_t, acc_t> *);
template <class flt_t, class acc_t> void build_t3(NeighList *, int *, IntelBuffers<flt_t, acc_t> *);
};
} // namespace LAMMPS_NS
#endif
#endif

View File

@ -0,0 +1,70 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
LAMMPS development team: developers@lammps.org
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#include "nstencil_bin_intel.h"
using namespace LAMMPS_NS;
/* ---------------------------------------------------------------------- */
template<int HALF, int DIM_3D, int TRI>
NStencilBinIntel<HALF, DIM_3D, TRI>::NStencilBinIntel(LAMMPS *lmp) : NStencil(lmp) {}
/* ----------------------------------------------------------------------
create stencil based on bin geometry and cutoff
------------------------------------------------------------------------- */
template<int HALF, int DIM_3D, int TRI>
void NStencilBinIntel<HALF, DIM_3D, TRI>::create()
{
int i, j, k;
// For half stencils, only the upper plane is needed
int sy_min = sy;
int sz_min = sz;
if (HALF && (!DIM_3D)) sy_min = 0;
if (HALF && DIM_3D) sz_min = 0;
nstencil = 0;
// For Intel, half and ortho stencils do not include central bin
// as, historically, this was never included in a stencil.
// Non-Intel npair classes were updated to account for this change,
// but the Intel npair classes have not yet been updated
// if (HALF && (!TRI)) stencil[nstencil++] = 0;
for (k = -sz_min; k <= sz; k++) {
for (j = -sy_min; j <= sy; j++) {
for (i = -sx; i <= sx; i++) {
// Now only include "upper right" bins for half and ortho stencils
if (HALF && (!DIM_3D) && (!TRI))
if (! (j > 0 || (j == 0 && i > 0))) continue;
if (HALF && DIM_3D && (!TRI))
if (! (k > 0 || j > 0 || (j == 0 && i > 0))) continue;
if (bin_distance(i,j,k) < cutneighmaxsq)
stencil[nstencil++] = k * mbiny * mbinx + j * mbinx + i;
}
}
}
}
namespace LAMMPS_NS {
template class NStencilBinIntel<0,0,0>;
template class NStencilBinIntel<0,1,0>;
template class NStencilBinIntel<1,0,0>;
template class NStencilBinIntel<1,0,1>;
template class NStencilBinIntel<1,1,0>;
template class NStencilBinIntel<1,1,1>;
}

View File

@ -0,0 +1,65 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
LAMMPS development team: developers@lammps.org
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef NSTENCIL_CLASS
// clang-format off
typedef NStencilBinIntel<0, 0, 0> NStencilFullBin2dIntel;
NStencilStyle(full/bin/2d/intel,
NStencilFullBin2dIntel,
NS_FULL | NS_BIN | NS_2D | NS_ORTHO | NS_TRI | NS_INTEL);
typedef NStencilBinIntel<0, 1, 0> NStencilFullBin3dIntel;
NStencilStyle(full/bin/3d/intel,
NStencilFullBin3dIntel,
NS_FULL | NS_BIN | NS_3D | NS_ORTHO | NS_TRI | NS_INTEL);
typedef NStencilBinIntel<1, 0, 0> NStencilHalfBin2dIntel;
NStencilStyle(half/bin/2d/intel,
NStencilHalfBin2dIntel,
NS_HALF | NS_BIN | NS_2D | NS_ORTHO | NS_INTEL);
typedef NStencilBinIntel<1, 0, 1> NStencilHalfBin2dTriIntel;
NStencilStyle(half/bin/2d/tri/intel,
NStencilHalfBin2dTriIntel,
NS_HALF | NS_BIN | NS_2D | NS_TRI | NS_INTEL);
typedef NStencilBinIntel<1, 1, 0> NStencilHalfBin3dIntel;
NStencilStyle(half/bin/3d/intel,
NStencilHalfBin3dIntel,
NS_HALF | NS_BIN | NS_3D | NS_ORTHO | NS_INTEL);
typedef NStencilBinIntel<1, 1, 1> NStencilHalfBin3dTriIntel;
NStencilStyle(half/bin/3d/tri/intel,
NStencilHalfBin3dTriIntel,
NS_HALF | NS_BIN | NS_3D | NS_TRI | NS_INTEL);
// clang-format on
#else
#ifndef LMP_NSTENCIL_BIN_INTEL_H
#define LMP_NSTENCIL_BIN_INTEL_H
#include "nstencil.h"
namespace LAMMPS_NS {
template<int HALF, int DIM_3D, int TRI>
class NStencilBinIntel : public NStencil {
public:
NStencilBinIntel(class LAMMPS *);
void create() override;
};
} // namespace LAMMPS_NS
#endif
#endif

View File

@ -1991,6 +1991,7 @@ int Neighbor::choose_stencil(NeighRequest *rq)
// require match of these request flags and mask bits
// (!A != !B) is effectively a logical xor
if (!rq->intel != !(mask & NS_INTEL)) continue;
if (!rq->ghost != !(mask & NS_GHOST)) continue;
if (!rq->ssa != !(mask & NS_SSA)) continue;

View File

@ -302,8 +302,9 @@ namespace NeighConst {
NS_ORTHO = 1 << 6,
NS_TRI = 1 << 7,
NS_GHOST = 1 << 8,
NS_SSA = 1 << 9,
NS_MULTI_OLD = 1 << 10
NS_INTEL = 1 << 9,
NS_SSA = 1 << 10,
NS_MULTI_OLD = 1 << 11
};
enum {