Reorganizing intel npair to work with cmake, adding intel stencil
This commit is contained in:
@ -16,7 +16,7 @@
|
||||
Contributing authors: W. Michael Brown (Intel)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "npair_full_bin_ghost_intel.h"
|
||||
#include "npair_bin_ghost_intel.h"
|
||||
|
||||
#include "atom.h"
|
||||
#include "comm.h"
|
||||
@ -25,8 +25,8 @@ NPairStyle(full/bin/ghost/intel,
|
||||
// clang-format on
|
||||
#else
|
||||
|
||||
#ifndef LMP_NPAIR_FULL_BIN_GHOST_INTEL_H
|
||||
#define LMP_NPAIR_FULL_BIN_GHOST_INTEL_H
|
||||
#ifndef LMP_NPAIR_BIN_GHOST_INTEL_H
|
||||
#define LMP_NPAIR_BIN_GHOST_INTEL_H
|
||||
|
||||
#include "npair_intel.h"
|
||||
|
||||
298
src/INTEL/npair_bin_intel.cpp
Normal file
298
src/INTEL/npair_bin_intel.cpp
Normal file
@ -0,0 +1,298 @@
|
||||
// clang-format off
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
https://www.lammps.org/, Sandia National Laboratories
|
||||
LAMMPS development team: developers@lammps.org
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing author: W. Michael Brown (Intel)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "npair_bin_intel.h"
|
||||
|
||||
#include "atom.h"
|
||||
#include "comm.h"
|
||||
#include "error.h"
|
||||
#include "neigh_list.h"
|
||||
#include "neighbor.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
NPairHalfBinNewtonIntel::NPairHalfBinNewtonIntel(LAMMPS *lmp) :
|
||||
NPairIntel(lmp) {}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
binned neighbor list construction with full Newton's 3rd law
|
||||
each owned atom i checks its own bin and other bins in Newton stencil
|
||||
every pair stored exactly once by some processor
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void NPairHalfBinNewtonIntel::build(NeighList *list)
|
||||
{
|
||||
if (nstencil / 2 > INTEL_MAX_STENCIL_CHECK)
|
||||
error->all(FLERR, "Too many neighbor bins for INTEL package.");
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (exclude)
|
||||
error->all(FLERR, "Exclusion lists not yet supported for Intel offload");
|
||||
#endif
|
||||
|
||||
if (_fix->precision() == FixIntel::PREC_MODE_MIXED)
|
||||
hbni(list, _fix->get_mixed_buffers());
|
||||
else if (_fix->precision() == FixIntel::PREC_MODE_DOUBLE)
|
||||
hbni(list, _fix->get_double_buffers());
|
||||
else
|
||||
hbni(list, _fix->get_single_buffers());
|
||||
|
||||
_fix->stop_watch(TIME_HOST_NEIGHBOR);
|
||||
}
|
||||
|
||||
template <class flt_t, class acc_t>
|
||||
void NPairHalfBinNewtonIntel::
|
||||
hbni(NeighList *list, IntelBuffers<flt_t,acc_t> *buffers) {
|
||||
const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
|
||||
list->inum = nlocal;
|
||||
|
||||
int host_start = _fix->host_start_neighbor();
|
||||
const int off_end = _fix->offload_end_neighbor();
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (off_end) grow_stencil();
|
||||
if (_fix->full_host_list()) host_start = 0;
|
||||
int offload_noghost = _fix->offload_noghost();
|
||||
#endif
|
||||
|
||||
buffers->grow_list(list, atom->nlocal, comm->nthreads, 0, off_end);
|
||||
|
||||
int need_ic = 0;
|
||||
if (atom->molecular != Atom::ATOMIC)
|
||||
dminimum_image_check(need_ic, neighbor->cutneighmax, neighbor->cutneighmax,
|
||||
neighbor->cutneighmax);
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (need_ic) {
|
||||
if (offload_noghost) {
|
||||
bin_newton<flt_t,acc_t,1,1,0,0,0>(1, list, buffers, 0, off_end);
|
||||
bin_newton<flt_t,acc_t,1,1,0,0,0>(0, list, buffers, host_start, nlocal,
|
||||
off_end);
|
||||
} else {
|
||||
bin_newton<flt_t,acc_t,0,1,0,0,0>(1, list, buffers, 0, off_end);
|
||||
bin_newton<flt_t,acc_t,0,1,0,0,0>(0, list, buffers, host_start, nlocal);
|
||||
}
|
||||
} else {
|
||||
if (offload_noghost) {
|
||||
bin_newton<flt_t,acc_t,1,0,0,0,0>(1, list, buffers, 0, off_end);
|
||||
bin_newton<flt_t,acc_t,1,0,0,0,0>(0, list, buffers, host_start, nlocal,
|
||||
off_end);
|
||||
} else {
|
||||
bin_newton<flt_t,acc_t,0,0,0,0,0>(1, list, buffers, 0, off_end);
|
||||
bin_newton<flt_t,acc_t,0,0,0,0,0>(0, list, buffers, host_start, nlocal);
|
||||
}
|
||||
}
|
||||
#else
|
||||
if (need_ic)
|
||||
bin_newton<flt_t,acc_t,0,1,0,0,0>(0, list, buffers, host_start, nlocal);
|
||||
else
|
||||
bin_newton<flt_t,acc_t,0,0,0,0,0>(0, list, buffers, host_start, nlocal);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
NPairHalfBinNewtonTriIntel::NPairHalfBinNewtonTriIntel(LAMMPS *lmp) :
|
||||
NPairIntel(lmp) {}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
binned neighbor list construction with Newton's 3rd law for triclinic
|
||||
each owned atom i checks its own bin and other bins in triclinic stencil
|
||||
every pair stored exactly once by some processor
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void NPairHalfBinNewtonTriIntel::build(NeighList *list)
|
||||
{
|
||||
if (nstencil > INTEL_MAX_STENCIL)
|
||||
error->all(FLERR, "Too many neighbor bins for INTEL package.");
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (exclude)
|
||||
error->all(FLERR, "Exclusion lists not yet supported for Intel offload");
|
||||
#endif
|
||||
|
||||
if (_fix->precision() == FixIntel::PREC_MODE_MIXED)
|
||||
hbnti(list, _fix->get_mixed_buffers());
|
||||
else if (_fix->precision() == FixIntel::PREC_MODE_DOUBLE)
|
||||
hbnti(list, _fix->get_double_buffers());
|
||||
else
|
||||
hbnti(list, _fix->get_single_buffers());
|
||||
|
||||
_fix->stop_watch(TIME_HOST_NEIGHBOR);
|
||||
}
|
||||
|
||||
template <class flt_t, class acc_t>
|
||||
void NPairHalfBinNewtonTriIntel::
|
||||
hbnti(NeighList *list, IntelBuffers<flt_t,acc_t> *buffers) {
|
||||
const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
|
||||
list->inum = nlocal;
|
||||
|
||||
int host_start = _fix->host_start_neighbor();
|
||||
const int off_end = _fix->offload_end_neighbor();
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (off_end) grow_stencil();
|
||||
if (_fix->full_host_list()) host_start = 0;
|
||||
int offload_noghost = _fix->offload_noghost();
|
||||
#endif
|
||||
|
||||
buffers->grow_list(list, atom->nlocal, comm->nthreads, 0, off_end);
|
||||
|
||||
int need_ic = 0;
|
||||
if (atom->molecular != Atom::ATOMIC)
|
||||
dminimum_image_check(need_ic, neighbor->cutneighmax, neighbor->cutneighmax,
|
||||
neighbor->cutneighmax);
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (need_ic) {
|
||||
if (offload_noghost) {
|
||||
bin_newton<flt_t,acc_t,1,1,0,1,0>(1, list, buffers, 0, off_end);
|
||||
bin_newton<flt_t,acc_t,1,1,0,1,0>(0, list, buffers, host_start, nlocal,
|
||||
off_end);
|
||||
} else {
|
||||
bin_newton<flt_t,acc_t,0,1,0,1,0>(1, list, buffers, 0, off_end);
|
||||
bin_newton<flt_t,acc_t,0,1,0,1,0>(0, list, buffers, host_start, nlocal);
|
||||
}
|
||||
} else {
|
||||
if (offload_noghost) {
|
||||
bin_newton<flt_t,acc_t,1,0,0,1,0>(1, list, buffers, 0, off_end);
|
||||
bin_newton<flt_t,acc_t,1,0,0,1,0>(0, list, buffers, host_start, nlocal,
|
||||
off_end);
|
||||
} else {
|
||||
bin_newton<flt_t,acc_t,0,0,0,1,0>(1, list, buffers, 0, off_end);
|
||||
bin_newton<flt_t,acc_t,0,0,0,1,0>(0, list, buffers, host_start, nlocal);
|
||||
}
|
||||
}
|
||||
#else
|
||||
if (need_ic)
|
||||
bin_newton<flt_t,acc_t,0,1,0,1,0>(0, list, buffers, host_start, nlocal);
|
||||
else
|
||||
bin_newton<flt_t,acc_t,0,0,0,1,0>(0, list, buffers, host_start, nlocal);
|
||||
#endif
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
NPairFullBinIntel::NPairFullBinIntel(LAMMPS *lmp) : NPairIntel(lmp) {}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
binned neighbor list construction for all neighbors
|
||||
every neighbor pair appears in list of both atoms i and j
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void NPairFullBinIntel::build(NeighList *list)
|
||||
{
|
||||
if (nstencil > INTEL_MAX_STENCIL_CHECK)
|
||||
error->all(FLERR, "Too many neighbor bins for INTEL package.");
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (exclude)
|
||||
error->all(FLERR, "Exclusion lists not yet supported for Intel offload");
|
||||
#endif
|
||||
|
||||
if (_fix->precision() == FixIntel::PREC_MODE_MIXED)
|
||||
fbi(list, _fix->get_mixed_buffers());
|
||||
else if (_fix->precision() == FixIntel::PREC_MODE_DOUBLE)
|
||||
fbi(list, _fix->get_double_buffers());
|
||||
else
|
||||
fbi(list, _fix->get_single_buffers());
|
||||
|
||||
_fix->stop_watch(TIME_HOST_NEIGHBOR);
|
||||
}
|
||||
|
||||
template <class flt_t, class acc_t>
|
||||
void NPairFullBinIntel::
|
||||
fbi(NeighList *list, IntelBuffers<flt_t,acc_t> *buffers) {
|
||||
const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
|
||||
list->inum = nlocal;
|
||||
list->gnum = 0;
|
||||
|
||||
int host_start = _fix->host_start_neighbor();;
|
||||
const int off_end = _fix->offload_end_neighbor();
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (off_end) grow_stencil();
|
||||
if (_fix->full_host_list()) host_start = 0;
|
||||
int offload_noghost = _fix->offload_noghost();
|
||||
#endif
|
||||
|
||||
buffers->grow_list(list, atom->nlocal, comm->nthreads,
|
||||
_fix->three_body_neighbor(), off_end,
|
||||
_fix->nbor_pack_width());
|
||||
|
||||
int need_ic = 0;
|
||||
if (atom->molecular != Atom::ATOMIC)
|
||||
dminimum_image_check(need_ic, neighbor->cutneighmax, neighbor->cutneighmax,
|
||||
neighbor->cutneighmax);
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (_fix->three_body_neighbor()) {
|
||||
if (need_ic) {
|
||||
if (offload_noghost) {
|
||||
bin_newton<flt_t,acc_t,1,1,1,0,1>(1, list, buffers, 0, off_end);
|
||||
bin_newton<flt_t,acc_t,1,1,1,0,1>(0, list, buffers, host_start, nlocal, off_end);
|
||||
} else {
|
||||
bin_newton<flt_t,acc_t,0,1,1,0,1>(1, list, buffers, 0, off_end);
|
||||
bin_newton<flt_t,acc_t,0,1,1,0,1>(0, list, buffers, host_start, nlocal);
|
||||
}
|
||||
} else {
|
||||
if (offload_noghost) {
|
||||
bin_newton<flt_t,acc_t,1,0,1,0,1>(1, list, buffers, 0, off_end);
|
||||
bin_newton<flt_t,acc_t,1,0,1,0,1>(0, list, buffers, host_start, nlocal, off_end);
|
||||
} else {
|
||||
bin_newton<flt_t,acc_t,0,0,1,0,1>(1, list, buffers, 0, off_end);
|
||||
bin_newton<flt_t,acc_t,0,0,1,0,1>(0, list, buffers, host_start, nlocal);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (need_ic) {
|
||||
if (offload_noghost) {
|
||||
bin_newton<flt_t,acc_t,1,1,1,0,0>(1, list, buffers, 0, off_end);
|
||||
bin_newton<flt_t,acc_t,1,1,1,0,0>(0, list, buffers, host_start, nlocal, off_end);
|
||||
} else {
|
||||
bin_newton<flt_t,acc_t,0,1,1,0,0>(1, list, buffers, 0, off_end);
|
||||
bin_newton<flt_t,acc_t,0,1,1,0,0>(0, list, buffers, host_start, nlocal);
|
||||
}
|
||||
} else {
|
||||
if (offload_noghost) {
|
||||
bin_newton<flt_t,acc_t,1,0,1,0,0>(1, list, buffers, 0, off_end);
|
||||
bin_newton<flt_t,acc_t,1,0,1,0,0>(0, list, buffers, host_start, nlocal, off_end);
|
||||
} else {
|
||||
bin_newton<flt_t,acc_t,0,0,1,0,0>(1, list, buffers, 0, off_end);
|
||||
bin_newton<flt_t,acc_t,0,0,1,0,0>(0, list, buffers, host_start, nlocal);
|
||||
}
|
||||
}
|
||||
}
|
||||
#else
|
||||
if (_fix->three_body_neighbor()) {
|
||||
if (need_ic)
|
||||
bin_newton<flt_t,acc_t,0,1,1,0,1>(0, list, buffers, host_start, nlocal);
|
||||
else
|
||||
bin_newton<flt_t,acc_t,0,0,1,0,1>(0, list, buffers, host_start, nlocal);
|
||||
} else {
|
||||
if (need_ic)
|
||||
bin_newton<flt_t,acc_t,0,1,1,0,0>(0, list, buffers, host_start, nlocal);
|
||||
else
|
||||
bin_newton<flt_t,acc_t,0,0,1,0,0>(0, list, buffers, host_start, nlocal);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
@ -14,20 +14,38 @@
|
||||
|
||||
#ifdef NPAIR_CLASS
|
||||
// clang-format off
|
||||
NPairStyle(half/bin/newton/intel,
|
||||
NPairHalfBinNewtonIntel,
|
||||
NP_HALF | NP_BIN | NP_NEWTON | NP_ORTHO | NP_INTEL);
|
||||
|
||||
NPairStyle(half/bin/newton/tri/intel,
|
||||
NPairHalfBinNewtonTriIntel,
|
||||
NP_HALF | NP_BIN | NP_NEWTON | NP_TRI | NP_INTEL);
|
||||
|
||||
NPairStyle(full/bin/intel,
|
||||
NPairFullBinIntel,
|
||||
NP_FULL | NP_BIN | NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI |
|
||||
NP_INTEL);
|
||||
// clang-format on
|
||||
#else
|
||||
|
||||
#ifndef LMP_NPAIR_HALF_BIN_NEWTON_INTEL_TRI_H
|
||||
#define LMP_NPAIR_HALF_BIN_NEWTON_INTEL_TRI_H
|
||||
#ifndef LMP_NPAIR_BIN_INTEL_H
|
||||
#define LMP_NPAIR_BIN_INTEL_H
|
||||
|
||||
#include "fix_intel.h"
|
||||
#include "npair_intel.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class NPairHalfBinNewtonIntel : public NPairIntel {
|
||||
public:
|
||||
NPairHalfBinNewtonIntel(class LAMMPS *);
|
||||
void build(class NeighList *) override;
|
||||
|
||||
private:
|
||||
template <class flt_t, class acc_t> void hbni(NeighList *, IntelBuffers<flt_t, acc_t> *);
|
||||
};
|
||||
|
||||
class NPairHalfBinNewtonTriIntel : public NPairIntel {
|
||||
public:
|
||||
NPairHalfBinNewtonTriIntel(class LAMMPS *);
|
||||
@ -37,6 +55,15 @@ class NPairHalfBinNewtonTriIntel : public NPairIntel {
|
||||
template <class flt_t, class acc_t> void hbnti(NeighList *, IntelBuffers<flt_t, acc_t> *);
|
||||
};
|
||||
|
||||
class NPairFullBinIntel : public NPairIntel {
|
||||
public:
|
||||
NPairFullBinIntel(class LAMMPS *);
|
||||
void build(class NeighList *) override;
|
||||
|
||||
private:
|
||||
template <class flt_t, class acc_t> void fbi(NeighList *, IntelBuffers<flt_t, acc_t> *);
|
||||
};
|
||||
|
||||
} // namespace LAMMPS_NS
|
||||
|
||||
#endif
|
||||
@ -1,134 +0,0 @@
|
||||
// clang-format off
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
https://www.lammps.org/, Sandia National Laboratories
|
||||
LAMMPS development team: developers@lammps.org
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing author: W. Michael Brown (Intel)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "npair_full_bin_intel.h"
|
||||
|
||||
#include "atom.h"
|
||||
#include "comm.h"
|
||||
#include "error.h"
|
||||
#include "neigh_list.h"
|
||||
#include "neighbor.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
NPairFullBinIntel::NPairFullBinIntel(LAMMPS *lmp) : NPairIntel(lmp) {}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
binned neighbor list construction for all neighbors
|
||||
every neighbor pair appears in list of both atoms i and j
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void NPairFullBinIntel::build(NeighList *list)
|
||||
{
|
||||
if (nstencil > INTEL_MAX_STENCIL_CHECK)
|
||||
error->all(FLERR, "Too many neighbor bins for INTEL package.");
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (exclude)
|
||||
error->all(FLERR, "Exclusion lists not yet supported for Intel offload");
|
||||
#endif
|
||||
|
||||
if (_fix->precision() == FixIntel::PREC_MODE_MIXED)
|
||||
fbi(list, _fix->get_mixed_buffers());
|
||||
else if (_fix->precision() == FixIntel::PREC_MODE_DOUBLE)
|
||||
fbi(list, _fix->get_double_buffers());
|
||||
else
|
||||
fbi(list, _fix->get_single_buffers());
|
||||
|
||||
_fix->stop_watch(TIME_HOST_NEIGHBOR);
|
||||
}
|
||||
|
||||
template <class flt_t, class acc_t>
|
||||
void NPairFullBinIntel::
|
||||
fbi(NeighList *list, IntelBuffers<flt_t,acc_t> *buffers) {
|
||||
const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
|
||||
list->inum = nlocal;
|
||||
list->gnum = 0;
|
||||
|
||||
int host_start = _fix->host_start_neighbor();;
|
||||
const int off_end = _fix->offload_end_neighbor();
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (off_end) grow_stencil();
|
||||
if (_fix->full_host_list()) host_start = 0;
|
||||
int offload_noghost = _fix->offload_noghost();
|
||||
#endif
|
||||
|
||||
buffers->grow_list(list, atom->nlocal, comm->nthreads,
|
||||
_fix->three_body_neighbor(), off_end,
|
||||
_fix->nbor_pack_width());
|
||||
|
||||
int need_ic = 0;
|
||||
if (atom->molecular != Atom::ATOMIC)
|
||||
dminimum_image_check(need_ic, neighbor->cutneighmax, neighbor->cutneighmax,
|
||||
neighbor->cutneighmax);
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (_fix->three_body_neighbor()) {
|
||||
if (need_ic) {
|
||||
if (offload_noghost) {
|
||||
bin_newton<flt_t,acc_t,1,1,1,0,1>(1, list, buffers, 0, off_end);
|
||||
bin_newton<flt_t,acc_t,1,1,1,0,1>(0, list, buffers, host_start, nlocal, off_end);
|
||||
} else {
|
||||
bin_newton<flt_t,acc_t,0,1,1,0,1>(1, list, buffers, 0, off_end);
|
||||
bin_newton<flt_t,acc_t,0,1,1,0,1>(0, list, buffers, host_start, nlocal);
|
||||
}
|
||||
} else {
|
||||
if (offload_noghost) {
|
||||
bin_newton<flt_t,acc_t,1,0,1,0,1>(1, list, buffers, 0, off_end);
|
||||
bin_newton<flt_t,acc_t,1,0,1,0,1>(0, list, buffers, host_start, nlocal, off_end);
|
||||
} else {
|
||||
bin_newton<flt_t,acc_t,0,0,1,0,1>(1, list, buffers, 0, off_end);
|
||||
bin_newton<flt_t,acc_t,0,0,1,0,1>(0, list, buffers, host_start, nlocal);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (need_ic) {
|
||||
if (offload_noghost) {
|
||||
bin_newton<flt_t,acc_t,1,1,1,0,0>(1, list, buffers, 0, off_end);
|
||||
bin_newton<flt_t,acc_t,1,1,1,0,0>(0, list, buffers, host_start, nlocal, off_end);
|
||||
} else {
|
||||
bin_newton<flt_t,acc_t,0,1,1,0,0>(1, list, buffers, 0, off_end);
|
||||
bin_newton<flt_t,acc_t,0,1,1,0,0>(0, list, buffers, host_start, nlocal);
|
||||
}
|
||||
} else {
|
||||
if (offload_noghost) {
|
||||
bin_newton<flt_t,acc_t,1,0,1,0,0>(1, list, buffers, 0, off_end);
|
||||
bin_newton<flt_t,acc_t,1,0,1,0,0>(0, list, buffers, host_start, nlocal, off_end);
|
||||
} else {
|
||||
bin_newton<flt_t,acc_t,0,0,1,0,0>(1, list, buffers, 0, off_end);
|
||||
bin_newton<flt_t,acc_t,0,0,1,0,0>(0, list, buffers, host_start, nlocal);
|
||||
}
|
||||
}
|
||||
}
|
||||
#else
|
||||
if (_fix->three_body_neighbor()) {
|
||||
if (need_ic)
|
||||
bin_newton<flt_t,acc_t,0,1,1,0,1>(0, list, buffers, host_start, nlocal);
|
||||
else
|
||||
bin_newton<flt_t,acc_t,0,0,1,0,1>(0, list, buffers, host_start, nlocal);
|
||||
} else {
|
||||
if (need_ic)
|
||||
bin_newton<flt_t,acc_t,0,1,1,0,0>(0, list, buffers, host_start, nlocal);
|
||||
else
|
||||
bin_newton<flt_t,acc_t,0,0,1,0,0>(0, list, buffers, host_start, nlocal);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
@ -1,44 +0,0 @@
|
||||
// clang-format off
|
||||
/* -*- c++ -*- ----------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
https://www.lammps.org/, Sandia National Laboratories
|
||||
LAMMPS development team: developers@lammps.org
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef NPAIR_CLASS
|
||||
// clang-format off
|
||||
NPairStyle(full/bin/intel,
|
||||
NPairFullBinIntel,
|
||||
NP_FULL | NP_BIN | NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI |
|
||||
NP_INTEL);
|
||||
// clang-format on
|
||||
#else
|
||||
|
||||
#ifndef LMP_NPAIR_FULL_BIN_INTEL_H
|
||||
#define LMP_NPAIR_FULL_BIN_INTEL_H
|
||||
|
||||
#include "fix_intel.h"
|
||||
#include "npair_intel.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class NPairFullBinIntel : public NPairIntel {
|
||||
public:
|
||||
NPairFullBinIntel(class LAMMPS *);
|
||||
void build(class NeighList *) override;
|
||||
|
||||
private:
|
||||
template <class flt_t, class acc_t> void fbi(NeighList *, IntelBuffers<flt_t, acc_t> *);
|
||||
};
|
||||
|
||||
} // namespace LAMMPS_NS
|
||||
|
||||
#endif
|
||||
#endif
|
||||
@ -1,108 +0,0 @@
|
||||
// clang-format off
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
https://www.lammps.org/, Sandia National Laboratories
|
||||
LAMMPS development team: developers@lammps.org
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing author: W. Michael Brown (Intel)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "npair_half_bin_newton_intel.h"
|
||||
|
||||
#include "atom.h"
|
||||
#include "comm.h"
|
||||
#include "error.h"
|
||||
#include "neigh_list.h"
|
||||
#include "neighbor.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
NPairHalfBinNewtonIntel::NPairHalfBinNewtonIntel(LAMMPS *lmp) :
|
||||
NPairIntel(lmp) {}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
binned neighbor list construction with full Newton's 3rd law
|
||||
each owned atom i checks its own bin and other bins in Newton stencil
|
||||
every pair stored exactly once by some processor
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void NPairHalfBinNewtonIntel::build(NeighList *list)
|
||||
{
|
||||
if (nstencil / 2 > INTEL_MAX_STENCIL_CHECK)
|
||||
error->all(FLERR, "Too many neighbor bins for INTEL package.");
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (exclude)
|
||||
error->all(FLERR, "Exclusion lists not yet supported for Intel offload");
|
||||
#endif
|
||||
|
||||
if (_fix->precision() == FixIntel::PREC_MODE_MIXED)
|
||||
hbni(list, _fix->get_mixed_buffers());
|
||||
else if (_fix->precision() == FixIntel::PREC_MODE_DOUBLE)
|
||||
hbni(list, _fix->get_double_buffers());
|
||||
else
|
||||
hbni(list, _fix->get_single_buffers());
|
||||
|
||||
_fix->stop_watch(TIME_HOST_NEIGHBOR);
|
||||
}
|
||||
|
||||
template <class flt_t, class acc_t>
|
||||
void NPairHalfBinNewtonIntel::
|
||||
hbni(NeighList *list, IntelBuffers<flt_t,acc_t> *buffers) {
|
||||
const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
|
||||
list->inum = nlocal;
|
||||
|
||||
int host_start = _fix->host_start_neighbor();
|
||||
const int off_end = _fix->offload_end_neighbor();
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (off_end) grow_stencil();
|
||||
if (_fix->full_host_list()) host_start = 0;
|
||||
int offload_noghost = _fix->offload_noghost();
|
||||
#endif
|
||||
|
||||
buffers->grow_list(list, atom->nlocal, comm->nthreads, 0, off_end);
|
||||
|
||||
int need_ic = 0;
|
||||
if (atom->molecular != Atom::ATOMIC)
|
||||
dminimum_image_check(need_ic, neighbor->cutneighmax, neighbor->cutneighmax,
|
||||
neighbor->cutneighmax);
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (need_ic) {
|
||||
if (offload_noghost) {
|
||||
bin_newton<flt_t,acc_t,1,1,0,0,0>(1, list, buffers, 0, off_end);
|
||||
bin_newton<flt_t,acc_t,1,1,0,0,0>(0, list, buffers, host_start, nlocal,
|
||||
off_end);
|
||||
} else {
|
||||
bin_newton<flt_t,acc_t,0,1,0,0,0>(1, list, buffers, 0, off_end);
|
||||
bin_newton<flt_t,acc_t,0,1,0,0,0>(0, list, buffers, host_start, nlocal);
|
||||
}
|
||||
} else {
|
||||
if (offload_noghost) {
|
||||
bin_newton<flt_t,acc_t,1,0,0,0,0>(1, list, buffers, 0, off_end);
|
||||
bin_newton<flt_t,acc_t,1,0,0,0,0>(0, list, buffers, host_start, nlocal,
|
||||
off_end);
|
||||
} else {
|
||||
bin_newton<flt_t,acc_t,0,0,0,0,0>(1, list, buffers, 0, off_end);
|
||||
bin_newton<flt_t,acc_t,0,0,0,0,0>(0, list, buffers, host_start, nlocal);
|
||||
}
|
||||
}
|
||||
#else
|
||||
if (need_ic)
|
||||
bin_newton<flt_t,acc_t,0,1,0,0,0>(0, list, buffers, host_start, nlocal);
|
||||
else
|
||||
bin_newton<flt_t,acc_t,0,0,0,0,0>(0, list, buffers, host_start, nlocal);
|
||||
#endif
|
||||
}
|
||||
@ -1,43 +0,0 @@
|
||||
// clang-format off
|
||||
/* -*- c++ -*- ----------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
https://www.lammps.org/, Sandia National Laboratories
|
||||
LAMMPS development team: developers@lammps.org
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef NPAIR_CLASS
|
||||
// clang-format off
|
||||
NPairStyle(half/bin/newton/intel,
|
||||
NPairHalfBinNewtonIntel,
|
||||
NP_HALF | NP_BIN | NP_NEWTON | NP_ORTHO | NP_INTEL);
|
||||
// clang-format on
|
||||
#else
|
||||
|
||||
#ifndef LMP_NPAIR_HALF_BIN_NEWTON_INTEL_H
|
||||
#define LMP_NPAIR_HALF_BIN_NEWTON_INTEL_H
|
||||
|
||||
#include "fix_intel.h"
|
||||
#include "npair_intel.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class NPairHalfBinNewtonIntel : public NPairIntel {
|
||||
public:
|
||||
NPairHalfBinNewtonIntel(class LAMMPS *);
|
||||
void build(class NeighList *) override;
|
||||
|
||||
private:
|
||||
template <class flt_t, class acc_t> void hbni(NeighList *, IntelBuffers<flt_t, acc_t> *);
|
||||
};
|
||||
|
||||
} // namespace LAMMPS_NS
|
||||
|
||||
#endif
|
||||
#endif
|
||||
@ -1,108 +0,0 @@
|
||||
// clang-format off
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
https://www.lammps.org/, Sandia National Laboratories
|
||||
LAMMPS development team: developers@lammps.org
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing author: W. Michael Brown (Intel)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "npair_half_bin_newton_tri_intel.h"
|
||||
|
||||
#include "atom.h"
|
||||
#include "comm.h"
|
||||
#include "error.h"
|
||||
#include "neigh_list.h"
|
||||
#include "neighbor.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
NPairHalfBinNewtonTriIntel::NPairHalfBinNewtonTriIntel(LAMMPS *lmp) :
|
||||
NPairIntel(lmp) {}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
binned neighbor list construction with Newton's 3rd law for triclinic
|
||||
each owned atom i checks its own bin and other bins in triclinic stencil
|
||||
every pair stored exactly once by some processor
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void NPairHalfBinNewtonTriIntel::build(NeighList *list)
|
||||
{
|
||||
if (nstencil > INTEL_MAX_STENCIL)
|
||||
error->all(FLERR, "Too many neighbor bins for INTEL package.");
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (exclude)
|
||||
error->all(FLERR, "Exclusion lists not yet supported for Intel offload");
|
||||
#endif
|
||||
|
||||
if (_fix->precision() == FixIntel::PREC_MODE_MIXED)
|
||||
hbnti(list, _fix->get_mixed_buffers());
|
||||
else if (_fix->precision() == FixIntel::PREC_MODE_DOUBLE)
|
||||
hbnti(list, _fix->get_double_buffers());
|
||||
else
|
||||
hbnti(list, _fix->get_single_buffers());
|
||||
|
||||
_fix->stop_watch(TIME_HOST_NEIGHBOR);
|
||||
}
|
||||
|
||||
template <class flt_t, class acc_t>
|
||||
void NPairHalfBinNewtonTriIntel::
|
||||
hbnti(NeighList *list, IntelBuffers<flt_t,acc_t> *buffers) {
|
||||
const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
|
||||
list->inum = nlocal;
|
||||
|
||||
int host_start = _fix->host_start_neighbor();
|
||||
const int off_end = _fix->offload_end_neighbor();
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (off_end) grow_stencil();
|
||||
if (_fix->full_host_list()) host_start = 0;
|
||||
int offload_noghost = _fix->offload_noghost();
|
||||
#endif
|
||||
|
||||
buffers->grow_list(list, atom->nlocal, comm->nthreads, 0, off_end);
|
||||
|
||||
int need_ic = 0;
|
||||
if (atom->molecular != Atom::ATOMIC)
|
||||
dminimum_image_check(need_ic, neighbor->cutneighmax, neighbor->cutneighmax,
|
||||
neighbor->cutneighmax);
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (need_ic) {
|
||||
if (offload_noghost) {
|
||||
bin_newton<flt_t,acc_t,1,1,0,1,0>(1, list, buffers, 0, off_end);
|
||||
bin_newton<flt_t,acc_t,1,1,0,1,0>(0, list, buffers, host_start, nlocal,
|
||||
off_end);
|
||||
} else {
|
||||
bin_newton<flt_t,acc_t,0,1,0,1,0>(1, list, buffers, 0, off_end);
|
||||
bin_newton<flt_t,acc_t,0,1,0,1,0>(0, list, buffers, host_start, nlocal);
|
||||
}
|
||||
} else {
|
||||
if (offload_noghost) {
|
||||
bin_newton<flt_t,acc_t,1,0,0,1,0>(1, list, buffers, 0, off_end);
|
||||
bin_newton<flt_t,acc_t,1,0,0,1,0>(0, list, buffers, host_start, nlocal,
|
||||
off_end);
|
||||
} else {
|
||||
bin_newton<flt_t,acc_t,0,0,0,1,0>(1, list, buffers, 0, off_end);
|
||||
bin_newton<flt_t,acc_t,0,0,0,1,0>(0, list, buffers, host_start, nlocal);
|
||||
}
|
||||
}
|
||||
#else
|
||||
if (need_ic)
|
||||
bin_newton<flt_t,acc_t,0,1,0,1,0>(0, list, buffers, host_start, nlocal);
|
||||
else
|
||||
bin_newton<flt_t,acc_t,0,0,0,1,0>(0, list, buffers, host_start, nlocal);
|
||||
#endif
|
||||
}
|
||||
@ -13,10 +13,10 @@
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing author: Stan Moore (SNL)
|
||||
Contributing author: W. Michael Brown (Intel)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "npair_halffull_newton_trim_intel.h"
|
||||
#include "npair_halffull_intel.h"
|
||||
|
||||
#include "atom.h"
|
||||
#include "comm.h"
|
||||
@ -29,6 +29,204 @@ using namespace LAMMPS_NS;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
NPairHalffullNewtonIntel::NPairHalffullNewtonIntel(LAMMPS *lmp) : NPair(lmp) {
|
||||
_fix = static_cast<FixIntel *>(modify->get_fix_by_id("package_intel"));
|
||||
if (!_fix) error->all(FLERR, "The 'package intel' command is required for /intel styles");
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
build half list from full list
|
||||
pair stored once if i,j are both owned and i < j
|
||||
if j is ghost, only store if j coords are "above and to the right" of i
|
||||
works if full list is a skip list
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template <class flt_t, class acc_t>
|
||||
void NPairHalffullNewtonIntel::build_t(NeighList *list,
|
||||
IntelBuffers<flt_t,acc_t> *buffers)
|
||||
{
|
||||
const int inum_full = list->listfull->inum;
|
||||
const int nlocal = atom->nlocal;
|
||||
const int e_nall = nlocal + atom->nghost;
|
||||
const ATOM_T * _noalias const x = buffers->get_x();
|
||||
int * _noalias const ilist = list->ilist;
|
||||
int * _noalias const numneigh = list->numneigh;
|
||||
int ** _noalias const firstneigh = list->firstneigh;
|
||||
const int * _noalias const ilist_full = list->listfull->ilist;
|
||||
const int * _noalias const numneigh_full = list->listfull->numneigh;
|
||||
const int ** _noalias const firstneigh_full = (const int ** const)list->listfull->firstneigh; // NOLINT
|
||||
|
||||
#if defined(_OPENMP)
|
||||
#pragma omp parallel
|
||||
#endif
|
||||
{
|
||||
int tid, ifrom, ito;
|
||||
IP_PRE_omp_range_id(ifrom, ito, tid, inum_full, comm->nthreads);
|
||||
|
||||
// each thread has its own page allocator
|
||||
MyPage<int> &ipage = list->ipage[tid];
|
||||
ipage.reset();
|
||||
|
||||
// loop over parent full list
|
||||
for (int ii = ifrom; ii < ito; ii++) {
|
||||
int n = 0;
|
||||
int *neighptr = ipage.vget();
|
||||
|
||||
const int i = ilist_full[ii];
|
||||
const flt_t xtmp = x[i].x;
|
||||
const flt_t ytmp = x[i].y;
|
||||
const flt_t ztmp = x[i].z;
|
||||
|
||||
// loop over full neighbor list
|
||||
|
||||
const int * _noalias const jlist = firstneigh_full[i];
|
||||
const int jnum = numneigh_full[i];
|
||||
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma vector aligned
|
||||
#pragma ivdep
|
||||
#endif
|
||||
for (int jj = 0; jj < jnum; jj++) {
|
||||
const int joriginal = jlist[jj];
|
||||
const int j = joriginal & NEIGHMASK;
|
||||
int addme = 1;
|
||||
if (j < nlocal) {
|
||||
if (i > j) addme = 0;
|
||||
} else {
|
||||
if (x[j].z < ztmp) addme = 0;
|
||||
if (x[j].z == ztmp) {
|
||||
if (x[j].y < ytmp) addme = 0;
|
||||
if (x[j].y == ytmp && x[j].x < xtmp) addme = 0;
|
||||
}
|
||||
}
|
||||
if (addme)
|
||||
neighptr[n++] = joriginal;
|
||||
}
|
||||
|
||||
ilist[ii] = i;
|
||||
firstneigh[i] = neighptr;
|
||||
numneigh[i] = n;
|
||||
|
||||
int pad_end = n;
|
||||
IP_PRE_neighbor_pad(pad_end, 0);
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma vector aligned
|
||||
#pragma loop_count min=1, max=INTEL_COMPILE_WIDTH-1, \
|
||||
avg=INTEL_COMPILE_WIDTH/2
|
||||
#endif
|
||||
for ( ; n < pad_end; n++)
|
||||
neighptr[n] = e_nall;
|
||||
|
||||
ipage.vgot(n);
|
||||
if (ipage.status())
|
||||
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
|
||||
}
|
||||
}
|
||||
list->inum = inum_full;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
build half list from full 3-body list
|
||||
half list is already stored as first part of 3-body list
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template <class flt_t>
|
||||
void NPairHalffullNewtonIntel::build_t3(NeighList *list, int *numhalf)
|
||||
{
|
||||
const int inum_full = list->listfull->inum;
|
||||
const int e_nall = atom->nlocal + atom->nghost;
|
||||
int * _noalias const ilist = list->ilist;
|
||||
int * _noalias const numneigh = list->numneigh;
|
||||
int ** _noalias const firstneigh = list->firstneigh;
|
||||
const int * _noalias const ilist_full = list->listfull->ilist;
|
||||
const int * _noalias const numneigh_full = numhalf;
|
||||
const int ** _noalias const firstneigh_full = (const int ** const)list->listfull->firstneigh; // NOLINT
|
||||
|
||||
int packthreads = 1;
|
||||
if (comm->nthreads > INTEL_HTHREADS) packthreads = comm->nthreads;
|
||||
|
||||
#if defined(_OPENMP)
|
||||
#pragma omp parallel if (packthreads > 1)
|
||||
#endif
|
||||
{
|
||||
int tid, ifrom, ito;
|
||||
IP_PRE_omp_range_id(ifrom, ito, tid, inum_full, packthreads);
|
||||
|
||||
// each thread has its own page allocator
|
||||
MyPage<int> &ipage = list->ipage[tid];
|
||||
ipage.reset();
|
||||
|
||||
// loop over parent full list
|
||||
for (int ii = ifrom; ii < ito; ii++) {
|
||||
int n = 0;
|
||||
int *neighptr = ipage.vget();
|
||||
|
||||
const int i = ilist_full[ii];
|
||||
|
||||
// loop over full neighbor list
|
||||
|
||||
const int * _noalias const jlist = firstneigh_full[i];
|
||||
const int jnum = numneigh_full[ii];
|
||||
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma vector aligned
|
||||
#pragma ivdep
|
||||
#endif
|
||||
for (int jj = 0; jj < jnum; jj++) {
|
||||
const int joriginal = jlist[jj];
|
||||
neighptr[n++] = joriginal;
|
||||
}
|
||||
|
||||
ilist[ii] = i;
|
||||
firstneigh[i] = neighptr;
|
||||
numneigh[i] = n;
|
||||
|
||||
int pad_end = n;
|
||||
IP_PRE_neighbor_pad(pad_end, 0);
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma vector aligned
|
||||
#pragma loop_count min=1, max=INTEL_COMPILE_WIDTH-1, \
|
||||
avg=INTEL_COMPILE_WIDTH/2
|
||||
#endif
|
||||
for ( ; n < pad_end; n++)
|
||||
neighptr[n] = e_nall;
|
||||
|
||||
ipage.vgot(n);
|
||||
if (ipage.status())
|
||||
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
|
||||
}
|
||||
}
|
||||
list->inum = inum_full;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void NPairHalffullNewtonIntel::build(NeighList *list)
|
||||
{
|
||||
if (_fix->three_body_neighbor() == 0) {
|
||||
if (_fix->precision() == FixIntel::PREC_MODE_MIXED)
|
||||
build_t(list, _fix->get_mixed_buffers());
|
||||
else if (_fix->precision() == FixIntel::PREC_MODE_DOUBLE)
|
||||
build_t(list, _fix->get_double_buffers());
|
||||
else
|
||||
build_t(list, _fix->get_single_buffers());
|
||||
} else {
|
||||
int *nhalf, *cnum;
|
||||
if (_fix->precision() == FixIntel::PREC_MODE_MIXED) {
|
||||
_fix->get_mixed_buffers()->get_list_data3(list->listfull, nhalf, cnum);
|
||||
build_t3<float>(list, nhalf);
|
||||
} else if (_fix->precision() == FixIntel::PREC_MODE_DOUBLE) {
|
||||
_fix->get_double_buffers()->get_list_data3(list->listfull, nhalf, cnum);
|
||||
build_t3<double>(list, nhalf);
|
||||
} else {
|
||||
_fix->get_single_buffers()->get_list_data3(list->listfull, nhalf, cnum);
|
||||
build_t3<float>(list, nhalf);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
NPairHalffullNewtonTrimIntel::NPairHalffullNewtonTrimIntel(LAMMPS *lmp) : NPair(lmp) {
|
||||
_fix = static_cast<FixIntel *>(modify->get_fix_by_id("package_intel"));
|
||||
if (!_fix) error->all(FLERR, "The 'package intel' command is required for /intel styles");
|
||||
128
src/INTEL/npair_halffull_intel.h
Normal file
128
src/INTEL/npair_halffull_intel.h
Normal file
@ -0,0 +1,128 @@
|
||||
// clang-format off
|
||||
/* -*- c++ -*- ----------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
https://www.lammps.org/, Sandia National Laboratories
|
||||
LAMMPS development team: developers@lammps.org
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing author: W. Michael Brown (Intel)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
// For Newton off, only used for hybrid to generate list for non-intel style.
|
||||
// Use standard routines.
|
||||
|
||||
#ifdef NPAIR_CLASS
|
||||
// clang-format off
|
||||
NPairStyle(halffull/newton/intel,
|
||||
NPairHalffullNewtonIntel,
|
||||
NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
|
||||
NP_ORTHO | NP_TRI| NP_INTEL);
|
||||
|
||||
NPairStyle(halffull/newton/skip/intel,
|
||||
NPairHalffullNewtonIntel,
|
||||
NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
|
||||
NP_ORTHO | NP_TRI | NP_SKIP | NP_INTEL);
|
||||
|
||||
NPairStyle(halffull/newtoff/intel,
|
||||
NPairHalffullNewtoff,
|
||||
NP_HALF_FULL | NP_NEWTOFF | NP_NSQ | NP_BIN | NP_MULTI | NP_HALF |
|
||||
NP_ORTHO | NP_TRI | NP_INTEL);
|
||||
|
||||
NPairStyle(halffull/newtoff/skip/intel,
|
||||
NPairHalffullNewtoff,
|
||||
NP_HALF_FULL | NP_NEWTOFF | NP_NSQ | NP_BIN | NP_MULTI | NP_HALF |
|
||||
NP_ORTHO | NP_TRI | NP_SKIP | NP_INTEL);
|
||||
|
||||
NPairStyle(halffull/newtoff/ghost/intel,
|
||||
NPairHalffullNewtoff,
|
||||
NP_HALF_FULL | NP_NEWTOFF | NP_NSQ | NP_BIN | NP_MULTI | NP_HALF |
|
||||
NP_ORTHO | NP_TRI | NP_GHOST | NP_INTEL);
|
||||
|
||||
NPairStyle(halffull/newtoff/skip/ghost/intel,
|
||||
NPairHalffullNewtoff,
|
||||
NP_HALF_FULL | NP_NEWTOFF | NP_NSQ | NP_BIN | NP_MULTI | NP_HALF |
|
||||
NP_ORTHO | NP_TRI | NP_SKIP | NP_GHOST | NP_INTEL);
|
||||
|
||||
|
||||
NPairStyle(halffull/newton/trim/intel,
|
||||
NPairHalffullNewtonTrimIntel,
|
||||
NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
|
||||
NP_ORTHO | NP_TRI| NP_TRIM | NP_INTEL);
|
||||
|
||||
NPairStyle(halffull/newton/skip/trim/intel,
|
||||
NPairHalffullNewtonTrimIntel,
|
||||
NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
|
||||
NP_ORTHO | NP_TRI | NP_SKIP | NP_TRIM | NP_INTEL);
|
||||
|
||||
NPairStyle(halffull/newtoff/trim/intel,
|
||||
NPairHalffullNewtoffTrim,
|
||||
NP_HALF_FULL | NP_NEWTOFF | NP_NSQ | NP_BIN | NP_MULTI | NP_HALF |
|
||||
NP_ORTHO | NP_TRI | NP_TRIM | NP_INTEL);
|
||||
|
||||
NPairStyle(halffull/newtoff/skip/trim/intel,
|
||||
NPairHalffullNewtoffTrim,
|
||||
NP_HALF_FULL | NP_NEWTOFF | NP_NSQ | NP_BIN | NP_MULTI | NP_HALF |
|
||||
NP_ORTHO | NP_TRI | NP_SKIP | NP_TRIM | NP_INTEL);
|
||||
|
||||
NPairStyle(halffull/newtoff/ghost/trim/intel,
|
||||
NPairHalffullNewtoffTrim,
|
||||
NP_HALF_FULL | NP_NEWTOFF | NP_NSQ | NP_BIN | NP_MULTI | NP_HALF |
|
||||
NP_ORTHO | NP_TRI | NP_GHOST | NP_TRIM | NP_INTEL);
|
||||
|
||||
NPairStyle(halffull/newtoff/skip/ghost/trim/intel,
|
||||
NPairHalffullNewtoffTrim,
|
||||
NP_HALF_FULL | NP_NEWTOFF | NP_NSQ | NP_BIN | NP_MULTI | NP_HALF |
|
||||
NP_ORTHO | NP_TRI | NP_SKIP | NP_GHOST | NP_TRIM | NP_INTEL);
|
||||
// clang-format on
|
||||
#else
|
||||
|
||||
#ifndef LMP_NPAIR_HALFFULL_INTEL_H
|
||||
#define LMP_NPAIR_HALFFULL_INTEL_H
|
||||
|
||||
#include "fix_intel.h"
|
||||
#include "npair.h"
|
||||
|
||||
#if defined(_OPENMP)
|
||||
#include <omp.h>
|
||||
#endif
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class NPairHalffullNewtonIntel : public NPair {
|
||||
public:
|
||||
NPairHalffullNewtonIntel(class LAMMPS *);
|
||||
void build(class NeighList *) override;
|
||||
|
||||
protected:
|
||||
FixIntel *_fix;
|
||||
|
||||
template <class flt_t, class acc_t> void build_t(NeighList *, IntelBuffers<flt_t, acc_t> *);
|
||||
|
||||
template <class flt_t> void build_t3(NeighList *, int *);
|
||||
};
|
||||
|
||||
class NPairHalffullNewtonTrimIntel : public NPair {
|
||||
public:
|
||||
NPairHalffullNewtonTrimIntel(class LAMMPS *);
|
||||
void build(class NeighList *) override;
|
||||
|
||||
protected:
|
||||
FixIntel *_fix;
|
||||
|
||||
template <class flt_t, class acc_t> void build_t(NeighList *, IntelBuffers<flt_t, acc_t> *);
|
||||
|
||||
template <class flt_t, class acc_t> void build_t3(NeighList *, int *, IntelBuffers<flt_t, acc_t> *);
|
||||
};
|
||||
|
||||
} // namespace LAMMPS_NS
|
||||
|
||||
#endif
|
||||
#endif
|
||||
@ -1,44 +0,0 @@
|
||||
// clang-format off
|
||||
/* -*- c++ -*- ----------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
https://www.lammps.org/, Sandia National Laboratories
|
||||
LAMMPS development team: developers@lammps.org
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing author: W. Michael Brown (Intel)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
// Only used for hybrid to generate list for non-intel style. Use
|
||||
// standard routines.
|
||||
|
||||
#ifdef NPAIR_CLASS
|
||||
// clang-format off
|
||||
NPairStyle(halffull/newtoff/intel,
|
||||
NPairHalffullNewtoff,
|
||||
NP_HALF_FULL | NP_NEWTOFF | NP_NSQ | NP_BIN | NP_MULTI | NP_HALF |
|
||||
NP_ORTHO | NP_TRI | NP_INTEL);
|
||||
|
||||
NPairStyle(halffull/newtoff/skip/intel,
|
||||
NPairHalffullNewtoff,
|
||||
NP_HALF_FULL | NP_NEWTOFF | NP_NSQ | NP_BIN | NP_MULTI | NP_HALF |
|
||||
NP_ORTHO | NP_TRI | NP_SKIP | NP_INTEL);
|
||||
|
||||
NPairStyle(halffull/newtoff/ghost/intel,
|
||||
NPairHalffullNewtoff,
|
||||
NP_HALF_FULL | NP_NEWTOFF | NP_NSQ | NP_BIN | NP_MULTI | NP_HALF |
|
||||
NP_ORTHO | NP_TRI | NP_GHOST | NP_INTEL);
|
||||
|
||||
NPairStyle(halffull/newtoff/skip/ghost/intel,
|
||||
NPairHalffullNewtoff,
|
||||
NP_HALF_FULL | NP_NEWTOFF | NP_NSQ | NP_BIN | NP_MULTI | NP_HALF |
|
||||
NP_ORTHO | NP_TRI | NP_SKIP | NP_GHOST | NP_INTEL);
|
||||
// clang-format on
|
||||
#endif
|
||||
@ -1,44 +0,0 @@
|
||||
// clang-format off
|
||||
/* -*- c++ -*- ----------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
https://www.lammps.org/, Sandia National Laboratories
|
||||
LAMMPS development team: developers@lammps.org
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing author: Stan Moore (SNL)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
// Only used for hybrid to generate list for non-intel style. Use
|
||||
// standard routines.
|
||||
|
||||
#ifdef NPAIR_CLASS
|
||||
// clang-format off
|
||||
NPairStyle(halffull/newtoff/trim/intel,
|
||||
NPairHalffullNewtoffTrim,
|
||||
NP_HALF_FULL | NP_NEWTOFF | NP_NSQ | NP_BIN | NP_MULTI | NP_HALF |
|
||||
NP_ORTHO | NP_TRI | NP_TRIM | NP_INTEL);
|
||||
|
||||
NPairStyle(halffull/newtoff/skip/trim/intel,
|
||||
NPairHalffullNewtoffTrim,
|
||||
NP_HALF_FULL | NP_NEWTOFF | NP_NSQ | NP_BIN | NP_MULTI | NP_HALF |
|
||||
NP_ORTHO | NP_TRI | NP_SKIP | NP_TRIM | NP_INTEL);
|
||||
|
||||
NPairStyle(halffull/newtoff/ghost/trim/intel,
|
||||
NPairHalffullNewtoffTrim,
|
||||
NP_HALF_FULL | NP_NEWTOFF | NP_NSQ | NP_BIN | NP_MULTI | NP_HALF |
|
||||
NP_ORTHO | NP_TRI | NP_GHOST | NP_TRIM | NP_INTEL);
|
||||
|
||||
NPairStyle(halffull/newtoff/skip/ghost/trim/intel,
|
||||
NPairHalffullNewtoffTrim,
|
||||
NP_HALF_FULL | NP_NEWTOFF | NP_NSQ | NP_BIN | NP_MULTI | NP_HALF |
|
||||
NP_ORTHO | NP_TRI | NP_SKIP | NP_GHOST | NP_TRIM | NP_INTEL);
|
||||
// clang-format on
|
||||
#endif
|
||||
@ -1,226 +0,0 @@
|
||||
// clang-format off
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
https://www.lammps.org/, Sandia National Laboratories
|
||||
LAMMPS development team: developers@lammps.org
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing author: W. Michael Brown (Intel)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "npair_halffull_newton_intel.h"
|
||||
|
||||
#include "atom.h"
|
||||
#include "comm.h"
|
||||
#include "error.h"
|
||||
#include "modify.h"
|
||||
#include "my_page.h"
|
||||
#include "neigh_list.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
NPairHalffullNewtonIntel::NPairHalffullNewtonIntel(LAMMPS *lmp) : NPair(lmp) {
|
||||
_fix = static_cast<FixIntel *>(modify->get_fix_by_id("package_intel"));
|
||||
if (!_fix) error->all(FLERR, "The 'package intel' command is required for /intel styles");
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
build half list from full list
|
||||
pair stored once if i,j are both owned and i < j
|
||||
if j is ghost, only store if j coords are "above and to the right" of i
|
||||
works if full list is a skip list
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template <class flt_t, class acc_t>
|
||||
void NPairHalffullNewtonIntel::build_t(NeighList *list,
|
||||
IntelBuffers<flt_t,acc_t> *buffers)
|
||||
{
|
||||
const int inum_full = list->listfull->inum;
|
||||
const int nlocal = atom->nlocal;
|
||||
const int e_nall = nlocal + atom->nghost;
|
||||
const ATOM_T * _noalias const x = buffers->get_x();
|
||||
int * _noalias const ilist = list->ilist;
|
||||
int * _noalias const numneigh = list->numneigh;
|
||||
int ** _noalias const firstneigh = list->firstneigh;
|
||||
const int * _noalias const ilist_full = list->listfull->ilist;
|
||||
const int * _noalias const numneigh_full = list->listfull->numneigh;
|
||||
const int ** _noalias const firstneigh_full = (const int ** const)list->listfull->firstneigh; // NOLINT
|
||||
|
||||
#if defined(_OPENMP)
|
||||
#pragma omp parallel
|
||||
#endif
|
||||
{
|
||||
int tid, ifrom, ito;
|
||||
IP_PRE_omp_range_id(ifrom, ito, tid, inum_full, comm->nthreads);
|
||||
|
||||
// each thread has its own page allocator
|
||||
MyPage<int> &ipage = list->ipage[tid];
|
||||
ipage.reset();
|
||||
|
||||
// loop over parent full list
|
||||
for (int ii = ifrom; ii < ito; ii++) {
|
||||
int n = 0;
|
||||
int *neighptr = ipage.vget();
|
||||
|
||||
const int i = ilist_full[ii];
|
||||
const flt_t xtmp = x[i].x;
|
||||
const flt_t ytmp = x[i].y;
|
||||
const flt_t ztmp = x[i].z;
|
||||
|
||||
// loop over full neighbor list
|
||||
|
||||
const int * _noalias const jlist = firstneigh_full[i];
|
||||
const int jnum = numneigh_full[i];
|
||||
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma vector aligned
|
||||
#pragma ivdep
|
||||
#endif
|
||||
for (int jj = 0; jj < jnum; jj++) {
|
||||
const int joriginal = jlist[jj];
|
||||
const int j = joriginal & NEIGHMASK;
|
||||
int addme = 1;
|
||||
if (j < nlocal) {
|
||||
if (i > j) addme = 0;
|
||||
} else {
|
||||
if (x[j].z < ztmp) addme = 0;
|
||||
if (x[j].z == ztmp) {
|
||||
if (x[j].y < ytmp) addme = 0;
|
||||
if (x[j].y == ytmp && x[j].x < xtmp) addme = 0;
|
||||
}
|
||||
}
|
||||
if (addme)
|
||||
neighptr[n++] = joriginal;
|
||||
}
|
||||
|
||||
ilist[ii] = i;
|
||||
firstneigh[i] = neighptr;
|
||||
numneigh[i] = n;
|
||||
|
||||
int pad_end = n;
|
||||
IP_PRE_neighbor_pad(pad_end, 0);
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma vector aligned
|
||||
#pragma loop_count min=1, max=INTEL_COMPILE_WIDTH-1, \
|
||||
avg=INTEL_COMPILE_WIDTH/2
|
||||
#endif
|
||||
for ( ; n < pad_end; n++)
|
||||
neighptr[n] = e_nall;
|
||||
|
||||
ipage.vgot(n);
|
||||
if (ipage.status())
|
||||
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
|
||||
}
|
||||
}
|
||||
list->inum = inum_full;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
build half list from full 3-body list
|
||||
half list is already stored as first part of 3-body list
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template <class flt_t>
|
||||
void NPairHalffullNewtonIntel::build_t3(NeighList *list, int *numhalf)
|
||||
{
|
||||
const int inum_full = list->listfull->inum;
|
||||
const int e_nall = atom->nlocal + atom->nghost;
|
||||
int * _noalias const ilist = list->ilist;
|
||||
int * _noalias const numneigh = list->numneigh;
|
||||
int ** _noalias const firstneigh = list->firstneigh;
|
||||
const int * _noalias const ilist_full = list->listfull->ilist;
|
||||
const int * _noalias const numneigh_full = numhalf;
|
||||
const int ** _noalias const firstneigh_full = (const int ** const)list->listfull->firstneigh; // NOLINT
|
||||
|
||||
int packthreads = 1;
|
||||
if (comm->nthreads > INTEL_HTHREADS) packthreads = comm->nthreads;
|
||||
|
||||
#if defined(_OPENMP)
|
||||
#pragma omp parallel if (packthreads > 1)
|
||||
#endif
|
||||
{
|
||||
int tid, ifrom, ito;
|
||||
IP_PRE_omp_range_id(ifrom, ito, tid, inum_full, packthreads);
|
||||
|
||||
// each thread has its own page allocator
|
||||
MyPage<int> &ipage = list->ipage[tid];
|
||||
ipage.reset();
|
||||
|
||||
// loop over parent full list
|
||||
for (int ii = ifrom; ii < ito; ii++) {
|
||||
int n = 0;
|
||||
int *neighptr = ipage.vget();
|
||||
|
||||
const int i = ilist_full[ii];
|
||||
|
||||
// loop over full neighbor list
|
||||
|
||||
const int * _noalias const jlist = firstneigh_full[i];
|
||||
const int jnum = numneigh_full[ii];
|
||||
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma vector aligned
|
||||
#pragma ivdep
|
||||
#endif
|
||||
for (int jj = 0; jj < jnum; jj++) {
|
||||
const int joriginal = jlist[jj];
|
||||
neighptr[n++] = joriginal;
|
||||
}
|
||||
|
||||
ilist[ii] = i;
|
||||
firstneigh[i] = neighptr;
|
||||
numneigh[i] = n;
|
||||
|
||||
int pad_end = n;
|
||||
IP_PRE_neighbor_pad(pad_end, 0);
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma vector aligned
|
||||
#pragma loop_count min=1, max=INTEL_COMPILE_WIDTH-1, \
|
||||
avg=INTEL_COMPILE_WIDTH/2
|
||||
#endif
|
||||
for ( ; n < pad_end; n++)
|
||||
neighptr[n] = e_nall;
|
||||
|
||||
ipage.vgot(n);
|
||||
if (ipage.status())
|
||||
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
|
||||
}
|
||||
}
|
||||
list->inum = inum_full;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void NPairHalffullNewtonIntel::build(NeighList *list)
|
||||
{
|
||||
if (_fix->three_body_neighbor() == 0) {
|
||||
if (_fix->precision() == FixIntel::PREC_MODE_MIXED)
|
||||
build_t(list, _fix->get_mixed_buffers());
|
||||
else if (_fix->precision() == FixIntel::PREC_MODE_DOUBLE)
|
||||
build_t(list, _fix->get_double_buffers());
|
||||
else
|
||||
build_t(list, _fix->get_single_buffers());
|
||||
} else {
|
||||
int *nhalf, *cnum;
|
||||
if (_fix->precision() == FixIntel::PREC_MODE_MIXED) {
|
||||
_fix->get_mixed_buffers()->get_list_data3(list->listfull, nhalf, cnum);
|
||||
build_t3<float>(list, nhalf);
|
||||
} else if (_fix->precision() == FixIntel::PREC_MODE_DOUBLE) {
|
||||
_fix->get_double_buffers()->get_list_data3(list->listfull, nhalf, cnum);
|
||||
build_t3<double>(list, nhalf);
|
||||
} else {
|
||||
_fix->get_single_buffers()->get_list_data3(list->listfull, nhalf, cnum);
|
||||
build_t3<float>(list, nhalf);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1,61 +0,0 @@
|
||||
// clang-format off
|
||||
/* -*- c++ -*- ----------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
https://www.lammps.org/, Sandia National Laboratories
|
||||
LAMMPS development team: developers@lammps.org
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing author: W. Michael Brown (Intel)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef NPAIR_CLASS
|
||||
// clang-format off
|
||||
NPairStyle(halffull/newton/intel,
|
||||
NPairHalffullNewtonIntel,
|
||||
NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
|
||||
NP_ORTHO | NP_TRI| NP_INTEL);
|
||||
|
||||
NPairStyle(halffull/newton/skip/intel,
|
||||
NPairHalffullNewtonIntel,
|
||||
NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
|
||||
NP_ORTHO | NP_TRI | NP_SKIP | NP_INTEL);
|
||||
// clang-format on
|
||||
#else
|
||||
|
||||
#ifndef LMP_NPAIR_HALFFULL_NEWTON_INTEL_H
|
||||
#define LMP_NPAIR_HALFFULL_NEWTON_INTEL_H
|
||||
|
||||
#include "fix_intel.h"
|
||||
#include "npair.h"
|
||||
|
||||
#if defined(_OPENMP)
|
||||
#include <omp.h>
|
||||
#endif
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class NPairHalffullNewtonIntel : public NPair {
|
||||
public:
|
||||
NPairHalffullNewtonIntel(class LAMMPS *);
|
||||
void build(class NeighList *) override;
|
||||
|
||||
protected:
|
||||
FixIntel *_fix;
|
||||
|
||||
template <class flt_t, class acc_t> void build_t(NeighList *, IntelBuffers<flt_t, acc_t> *);
|
||||
|
||||
template <class flt_t> void build_t3(NeighList *, int *);
|
||||
};
|
||||
|
||||
} // namespace LAMMPS_NS
|
||||
|
||||
#endif
|
||||
#endif
|
||||
@ -1,61 +0,0 @@
|
||||
// clang-format off
|
||||
/* -*- c++ -*- ----------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
https://www.lammps.org/, Sandia National Laboratories
|
||||
LAMMPS development team: developers@lammps.org
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing author: Stan Moore (SNL)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef NPAIR_CLASS
|
||||
// clang-format off
|
||||
NPairStyle(halffull/newton/trim/intel,
|
||||
NPairHalffullNewtonTrimIntel,
|
||||
NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
|
||||
NP_ORTHO | NP_TRI| NP_TRIM | NP_INTEL);
|
||||
|
||||
NPairStyle(halffull/newton/skip/trim/intel,
|
||||
NPairHalffullNewtonTrimIntel,
|
||||
NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
|
||||
NP_ORTHO | NP_TRI | NP_SKIP | NP_TRIM | NP_INTEL);
|
||||
// clang-format on
|
||||
#else
|
||||
|
||||
#ifndef LMP_NPAIR_HALFFULL_NEWTON_TRIM_INTEL_H
|
||||
#define LMP_NPAIR_HALFFULL_NEWTON_TRIM_INTEL_H
|
||||
|
||||
#include "fix_intel.h"
|
||||
#include "npair.h"
|
||||
|
||||
#if defined(_OPENMP)
|
||||
#include <omp.h>
|
||||
#endif
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class NPairHalffullNewtonTrimIntel : public NPair {
|
||||
public:
|
||||
NPairHalffullNewtonTrimIntel(class LAMMPS *);
|
||||
void build(class NeighList *) override;
|
||||
|
||||
protected:
|
||||
FixIntel *_fix;
|
||||
|
||||
template <class flt_t, class acc_t> void build_t(NeighList *, IntelBuffers<flt_t, acc_t> *);
|
||||
|
||||
template <class flt_t, class acc_t> void build_t3(NeighList *, int *, IntelBuffers<flt_t, acc_t> *);
|
||||
};
|
||||
|
||||
} // namespace LAMMPS_NS
|
||||
|
||||
#endif
|
||||
#endif
|
||||
70
src/INTEL/nstencil_bin_intel.cpp
Normal file
70
src/INTEL/nstencil_bin_intel.cpp
Normal file
@ -0,0 +1,70 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
https://www.lammps.org/, Sandia National Laboratories
|
||||
LAMMPS development team: developers@lammps.org
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "nstencil_bin_intel.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<int HALF, int DIM_3D, int TRI>
|
||||
NStencilBinIntel<HALF, DIM_3D, TRI>::NStencilBinIntel(LAMMPS *lmp) : NStencil(lmp) {}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
create stencil based on bin geometry and cutoff
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<int HALF, int DIM_3D, int TRI>
|
||||
void NStencilBinIntel<HALF, DIM_3D, TRI>::create()
|
||||
{
|
||||
int i, j, k;
|
||||
|
||||
// For half stencils, only the upper plane is needed
|
||||
int sy_min = sy;
|
||||
int sz_min = sz;
|
||||
if (HALF && (!DIM_3D)) sy_min = 0;
|
||||
if (HALF && DIM_3D) sz_min = 0;
|
||||
|
||||
nstencil = 0;
|
||||
|
||||
// For Intel, half and ortho stencils do not include central bin
|
||||
// as, historically, this was never included in a stencil.
|
||||
// Non-Intel npair classes were updated to account for this change,
|
||||
// but the Intel npair classes have not yet been updated
|
||||
// if (HALF && (!TRI)) stencil[nstencil++] = 0;
|
||||
|
||||
for (k = -sz_min; k <= sz; k++) {
|
||||
for (j = -sy_min; j <= sy; j++) {
|
||||
for (i = -sx; i <= sx; i++) {
|
||||
|
||||
// Now only include "upper right" bins for half and ortho stencils
|
||||
if (HALF && (!DIM_3D) && (!TRI))
|
||||
if (! (j > 0 || (j == 0 && i > 0))) continue;
|
||||
if (HALF && DIM_3D && (!TRI))
|
||||
if (! (k > 0 || j > 0 || (j == 0 && i > 0))) continue;
|
||||
|
||||
if (bin_distance(i,j,k) < cutneighmaxsq)
|
||||
stencil[nstencil++] = k * mbiny * mbinx + j * mbinx + i;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
template class NStencilBinIntel<0,0,0>;
|
||||
template class NStencilBinIntel<0,1,0>;
|
||||
template class NStencilBinIntel<1,0,0>;
|
||||
template class NStencilBinIntel<1,0,1>;
|
||||
template class NStencilBinIntel<1,1,0>;
|
||||
template class NStencilBinIntel<1,1,1>;
|
||||
}
|
||||
65
src/INTEL/nstencil_bin_intel.h
Normal file
65
src/INTEL/nstencil_bin_intel.h
Normal file
@ -0,0 +1,65 @@
|
||||
/* -*- c++ -*- ----------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
https://www.lammps.org/, Sandia National Laboratories
|
||||
LAMMPS development team: developers@lammps.org
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef NSTENCIL_CLASS
|
||||
// clang-format off
|
||||
typedef NStencilBinIntel<0, 0, 0> NStencilFullBin2dIntel;
|
||||
NStencilStyle(full/bin/2d/intel,
|
||||
NStencilFullBin2dIntel,
|
||||
NS_FULL | NS_BIN | NS_2D | NS_ORTHO | NS_TRI | NS_INTEL);
|
||||
|
||||
typedef NStencilBinIntel<0, 1, 0> NStencilFullBin3dIntel;
|
||||
NStencilStyle(full/bin/3d/intel,
|
||||
NStencilFullBin3dIntel,
|
||||
NS_FULL | NS_BIN | NS_3D | NS_ORTHO | NS_TRI | NS_INTEL);
|
||||
|
||||
typedef NStencilBinIntel<1, 0, 0> NStencilHalfBin2dIntel;
|
||||
NStencilStyle(half/bin/2d/intel,
|
||||
NStencilHalfBin2dIntel,
|
||||
NS_HALF | NS_BIN | NS_2D | NS_ORTHO | NS_INTEL);
|
||||
|
||||
typedef NStencilBinIntel<1, 0, 1> NStencilHalfBin2dTriIntel;
|
||||
NStencilStyle(half/bin/2d/tri/intel,
|
||||
NStencilHalfBin2dTriIntel,
|
||||
NS_HALF | NS_BIN | NS_2D | NS_TRI | NS_INTEL);
|
||||
|
||||
typedef NStencilBinIntel<1, 1, 0> NStencilHalfBin3dIntel;
|
||||
NStencilStyle(half/bin/3d/intel,
|
||||
NStencilHalfBin3dIntel,
|
||||
NS_HALF | NS_BIN | NS_3D | NS_ORTHO | NS_INTEL);
|
||||
|
||||
typedef NStencilBinIntel<1, 1, 1> NStencilHalfBin3dTriIntel;
|
||||
NStencilStyle(half/bin/3d/tri/intel,
|
||||
NStencilHalfBin3dTriIntel,
|
||||
NS_HALF | NS_BIN | NS_3D | NS_TRI | NS_INTEL);
|
||||
// clang-format on
|
||||
#else
|
||||
|
||||
#ifndef LMP_NSTENCIL_BIN_INTEL_H
|
||||
#define LMP_NSTENCIL_BIN_INTEL_H
|
||||
|
||||
#include "nstencil.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
template<int HALF, int DIM_3D, int TRI>
|
||||
class NStencilBinIntel : public NStencil {
|
||||
public:
|
||||
NStencilBinIntel(class LAMMPS *);
|
||||
void create() override;
|
||||
};
|
||||
|
||||
} // namespace LAMMPS_NS
|
||||
|
||||
#endif
|
||||
#endif
|
||||
@ -1991,6 +1991,7 @@ int Neighbor::choose_stencil(NeighRequest *rq)
|
||||
// require match of these request flags and mask bits
|
||||
// (!A != !B) is effectively a logical xor
|
||||
|
||||
if (!rq->intel != !(mask & NS_INTEL)) continue;
|
||||
if (!rq->ghost != !(mask & NS_GHOST)) continue;
|
||||
if (!rq->ssa != !(mask & NS_SSA)) continue;
|
||||
|
||||
|
||||
@ -302,8 +302,9 @@ namespace NeighConst {
|
||||
NS_ORTHO = 1 << 6,
|
||||
NS_TRI = 1 << 7,
|
||||
NS_GHOST = 1 << 8,
|
||||
NS_SSA = 1 << 9,
|
||||
NS_MULTI_OLD = 1 << 10
|
||||
NS_INTEL = 1 << 9,
|
||||
NS_SSA = 1 << 10,
|
||||
NS_MULTI_OLD = 1 << 11
|
||||
};
|
||||
|
||||
enum {
|
||||
|
||||
Reference in New Issue
Block a user