Add trim methods for INTEL

This commit is contained in:
Stan Gerald Moore
2022-07-12 09:58:00 -06:00
parent 1cc5b8aa95
commit be1396dab0
5 changed files with 554 additions and 0 deletions

View File

@ -0,0 +1,44 @@
// clang-format off
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: Stan Moore (SNL)
------------------------------------------------------------------------- */
// Only used for hybrid to generate list for non-intel style. Use
// standard routines.
#ifdef NPAIR_CLASS
// clang-format off
NPairStyle(halffull/newtoff/trim/intel,
NPairHalffullNewtoffTrim,
NP_HALF_FULL | NP_NEWTOFF | NP_NSQ | NP_BIN | NP_MULTI | NP_HALF |
NP_ORTHO | NP_TRI | NP_TRIM | NP_INTEL);
NPairStyle(halffull/newtoff/skip/trim/intel,
NPairHalffullNewtoffTrim,
NP_HALF_FULL | NP_NEWTOFF | NP_NSQ | NP_BIN | NP_MULTI | NP_HALF |
NP_ORTHO | NP_TRI | NP_SKIP | NP_TRIM | NP_INTEL);
NPairStyle(halffull/newtoff/ghost/trim/intel,
NPairHalffullNewtoffTrim,
NP_HALF_FULL | NP_NEWTOFF | NP_NSQ | NP_BIN | NP_MULTI | NP_HALF |
NP_ORTHO | NP_TRI | NP_GHOST | NP_TRIM | NP_INTEL);
NPairStyle(halffull/newtoff/skip/ghost/trim/intel,
NPairHalffullNewtoffTrim,
NP_HALF_FULL | NP_NEWTOFF | NP_NSQ | NP_BIN | NP_MULTI | NP_HALF |
NP_ORTHO | NP_TRI | NP_SKIP | NP_GHOST | NP_TRIM | NP_INTEL);
// clang-format on
#endif

View File

@ -0,0 +1,258 @@
// clang-format off
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: Stan Moore (SNL)
------------------------------------------------------------------------- */
#include "npair_halffull_newton_trim_intel.h"
#include "atom.h"
#include "comm.h"
#include "error.h"
#include "modify.h"
#include "my_page.h"
#include "neigh_list.h"
using namespace LAMMPS_NS;
/* ---------------------------------------------------------------------- */
NPairHalffullNewtonTrimIntel::NPairHalffullNewtonTrimIntel(LAMMPS *lmp) : NPair(lmp) {
_fix = static_cast<FixIntel *>(modify->get_fix_by_id("package_intel"));
if (!_fix) error->all(FLERR, "The 'package intel' command is required for /intel styles");
}
/* ----------------------------------------------------------------------
build half list from full list and trim to shorter cutoff
pair stored once if i,j are both owned and i < j
if j is ghost, only store if j coords are "above and to the right" of i
works if full list is a skip list
------------------------------------------------------------------------- */
template <class flt_t, class acc_t>
void NPairHalffullNewtonTrimIntel::build_t(NeighList *list,
IntelBuffers<flt_t,acc_t> *buffers)
{
const int inum_full = list->listfull->inum;
const int nlocal = atom->nlocal;
const int e_nall = nlocal + atom->nghost;
const ATOM_T * _noalias const x = buffers->get_x();
int * _noalias const ilist = list->ilist;
int * _noalias const numneigh = list->numneigh;
int ** _noalias const firstneigh = list->firstneigh;
const int * _noalias const ilist_full = list->listfull->ilist;
const int * _noalias const numneigh_full = list->listfull->numneigh;
const int ** _noalias const firstneigh_full = (const int ** const)list->listfull->firstneigh; // NOLINT
const double cutsq_custom = cutoff_custom * cutoff_custom;
#if defined(_OPENMP)
#pragma omp parallel
#endif
{
int tid, ifrom, ito;
IP_PRE_omp_range_id(ifrom, ito, tid, inum_full, comm->nthreads);
// each thread has its own page allocator
MyPage<int> &ipage = list->ipage[tid];
ipage.reset();
// loop over parent full list
for (int ii = ifrom; ii < ito; ii++) {
int n = 0;
int *neighptr = ipage.vget();
const int i = ilist_full[ii];
const flt_t xtmp = x[i].x;
const flt_t ytmp = x[i].y;
const flt_t ztmp = x[i].z;
// loop over full neighbor list
const int * _noalias const jlist = firstneigh_full[i];
const int jnum = numneigh_full[i];
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#pragma ivdep
#endif
for (int jj = 0; jj < jnum; jj++) {
const int joriginal = jlist[jj];
const int j = joriginal & NEIGHMASK;
int addme = 1;
if (j < nlocal) {
if (i > j) addme = 0;
} else {
if (x[j].z < ztmp) addme = 0;
if (x[j].z == ztmp) {
if (x[j].y < ytmp) addme = 0;
if (x[j].y == ytmp && x[j].x < xtmp) addme = 0;
}
}
// trim to shorter cutoff
const flt_t delx = xtmp - x[j].x;
const flt_t dely = ytmp - x[j].y;
const flt_t delz = ztmp - x[j].z;
const flt_t rsq = delx * delx + dely * dely + delz * delz;
if (rsq > cutsq_custom) addme = 0;
if (addme)
neighptr[n++] = joriginal;
}
ilist[ii] = i;
firstneigh[i] = neighptr;
numneigh[i] = n;
int pad_end = n;
IP_PRE_neighbor_pad(pad_end, 0);
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#pragma loop_count min=1, max=INTEL_COMPILE_WIDTH-1, \
avg=INTEL_COMPILE_WIDTH/2
#endif
for ( ; n < pad_end; n++)
neighptr[n] = e_nall;
ipage.vgot(n);
if (ipage.status())
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
}
}
list->inum = inum_full;
}
/* ----------------------------------------------------------------------
build half list from full 3-body list and trim to shorter cutoff
half list is already stored as first part of 3-body list
------------------------------------------------------------------------- */
template <class flt_t, class acc_t>
void NPairHalffullNewtonTrimIntel::build_t3(NeighList *list, int *numhalf,
IntelBuffers<flt_t,acc_t> *buffers)
{
const int inum_full = list->listfull->inum;
const int e_nall = atom->nlocal + atom->nghost;
const ATOM_T * _noalias const x = buffers->get_x();
int * _noalias const ilist = list->ilist;
int * _noalias const numneigh = list->numneigh;
int ** _noalias const firstneigh = list->firstneigh;
const int * _noalias const ilist_full = list->listfull->ilist;
const int * _noalias const numneigh_full = numhalf;
const int ** _noalias const firstneigh_full = (const int ** const)list->listfull->firstneigh; // NOLINT
const double cutsq_custom = cutoff_custom * cutoff_custom;
int packthreads = 1;
if (comm->nthreads > INTEL_HTHREADS) packthreads = comm->nthreads;
#if defined(_OPENMP)
#pragma omp parallel if (packthreads > 1)
#endif
{
int tid, ifrom, ito;
IP_PRE_omp_range_id(ifrom, ito, tid, inum_full, packthreads);
// each thread has its own page allocator
MyPage<int> &ipage = list->ipage[tid];
ipage.reset();
// loop over parent full list
for (int ii = ifrom; ii < ito; ii++) {
int n = 0;
int *neighptr = ipage.vget();
const int i = ilist_full[ii];
const flt_t xtmp = x[i].x;
const flt_t ytmp = x[i].y;
const flt_t ztmp = x[i].z;
// loop over full neighbor list
const int * _noalias const jlist = firstneigh_full[i];
const int jnum = numneigh_full[ii];
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#pragma ivdep
#endif
for (int jj = 0; jj < jnum; jj++) {
const int joriginal = jlist[jj];
const int j = joriginal & NEIGHMASK;
int addme = 1;
// trim to shorter cutoff
const flt_t delx = xtmp - x[j].x;
const flt_t dely = ytmp - x[j].y;
const flt_t delz = ztmp - x[j].z;
const flt_t rsq = delx * delx + dely * dely + delz * delz;
if (rsq > cutsq_custom) addme = 0;
if (addme)
neighptr[n++] = joriginal;
}
ilist[ii] = i;
firstneigh[i] = neighptr;
numneigh[i] = n;
int pad_end = n;
IP_PRE_neighbor_pad(pad_end, 0);
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#pragma loop_count min=1, max=INTEL_COMPILE_WIDTH-1, \
avg=INTEL_COMPILE_WIDTH/2
#endif
for ( ; n < pad_end; n++)
neighptr[n] = e_nall;
ipage.vgot(n);
if (ipage.status())
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
}
}
list->inum = inum_full;
}
/* ---------------------------------------------------------------------- */
void NPairHalffullNewtonTrimIntel::build(NeighList *list)
{
if (_fix->three_body_neighbor() == 0) {
if (_fix->precision() == FixIntel::PREC_MODE_MIXED)
build_t(list, _fix->get_mixed_buffers());
else if (_fix->precision() == FixIntel::PREC_MODE_DOUBLE)
build_t(list, _fix->get_double_buffers());
else
build_t(list, _fix->get_single_buffers());
} else {
int *nhalf, *cnum;
if (_fix->precision() == FixIntel::PREC_MODE_MIXED) {
_fix->get_mixed_buffers()->get_list_data3(list->listfull, nhalf, cnum);
build_t3<float>(list, nhalf, _fix->get_mixed_buffers());
} else if (_fix->precision() == FixIntel::PREC_MODE_DOUBLE) {
_fix->get_double_buffers()->get_list_data3(list->listfull, nhalf, cnum);
build_t3<double>(list, nhalf, _fix->get_double_buffers());
} else {
_fix->get_single_buffers()->get_list_data3(list->listfull, nhalf, cnum);
build_t3<float>(list, nhalf, _fix->get_single_buffers());
}
}
}

View File

@ -0,0 +1,61 @@
// clang-format off
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: Stan Moore (SNL)
------------------------------------------------------------------------- */
#ifdef NPAIR_CLASS
// clang-format off
NPairStyle(halffull/newton/trim/intel,
NPairHalffullNewtonTrimIntel,
NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
NP_ORTHO | NP_TRI| NP_TRIM | NP_INTEL);
NPairStyle(halffull/newton/skip/trim/intel,
NPairHalffullNewtonTrimIntel,
NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
NP_ORTHO | NP_TRI | NP_SKIP | NP_TRIM | NP_INTEL);
// clang-format on
#else
#ifndef LMP_NPAIR_HALFFULL_NEWTON_TRIM_INTEL_H
#define LMP_NPAIR_HALFFULL_NEWTON_TRIM_INTEL_H
#include "fix_intel.h"
#include "npair.h"
#if defined(_OPENMP)
#include <omp.h>
#endif
namespace LAMMPS_NS {
class NPairHalffullNewtonTrimIntel : public NPair {
public:
NPairHalffullNewtonTrimIntel(class LAMMPS *);
void build(class NeighList *) override;
protected:
FixIntel *_fix;
template <class flt_t, class acc_t> void build_t(NeighList *, IntelBuffers<flt_t, acc_t> *);
template <class flt_t, class acc_t> void build_t3(NeighList *, int *, IntelBuffers<flt_t, acc_t> *);
};
} // namespace LAMMPS_NS
#endif
#endif

View File

@ -0,0 +1,138 @@
// clang-format off
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: Stan Moore (SNL)
------------------------------------------------------------------------- */
#include "npair_trim_intel.h"
#include "atom.h"
#include "comm.h"
#include "error.h"
#include "modify.h"
#include "my_page.h"
#include "neigh_list.h"
using namespace LAMMPS_NS;
/* ---------------------------------------------------------------------- */
NPairTrimIntel::NPairTrimIntel(LAMMPS *lmp) : NPair(lmp) {
_fix = static_cast<FixIntel *>(modify->get_fix_by_id("package_intel"));
if (!_fix) error->all(FLERR, "The 'package intel' command is required for /intel styles");
}
/* ----------------------------------------------------------------------
trim from copy list to shorter cutoff
------------------------------------------------------------------------- */
template <class flt_t, class acc_t>
void NPairTrimIntel::build_t(NeighList *list,
IntelBuffers<flt_t,acc_t> *buffers)
{
const int inum_copy = list->listcopy->inum;
const int nlocal = atom->nlocal;
const int e_nall = nlocal + atom->nghost;
const ATOM_T * _noalias const x = buffers->get_x();
int * _noalias const ilist = list->ilist;
int * _noalias const numneigh = list->numneigh;
int ** _noalias const firstneigh = list->firstneigh;
const int * _noalias const ilist_copy = list->listcopy->ilist;
const int * _noalias const numneigh_copy = list->listcopy->numneigh;
const int ** _noalias const firstneigh_copy = (const int ** const)list->listcopy->firstneigh; // NOLINT
const double cutsq_custom = cutoff_custom * cutoff_custom;
#if defined(_OPENMP)
#pragma omp parallel
#endif
{
int tid, ifrom, ito;
IP_PRE_omp_range_id(ifrom, ito, tid, inum_copy, comm->nthreads);
// each thread has its own page allocator
MyPage<int> &ipage = list->ipage[tid];
ipage.reset();
// loop over parent copy list
for (int ii = ifrom; ii < ito; ii++) {
int n = 0;
int *neighptr = ipage.vget();
const int i = ilist_copy[ii];
const flt_t xtmp = x[i].x;
const flt_t ytmp = x[i].y;
const flt_t ztmp = x[i].z;
// loop over copy neighbor list
const int * _noalias const jlist = firstneigh_copy[i];
const int jnum = numneigh_copy[i];
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#pragma ivdep
#endif
for (int jj = 0; jj < jnum; jj++) {
const int joriginal = jlist[jj];
const int j = joriginal & NEIGHMASK;
int addme = 1;
// trim to shorter cutoff
const flt_t delx = xtmp - x[j].x;
const flt_t dely = ytmp - x[j].y;
const flt_t delz = ztmp - x[j].z;
const flt_t rsq = delx * delx + dely * dely + delz * delz;
if (rsq > cutsq_custom) addme = 0;
if (addme)
neighptr[n++] = joriginal;
}
ilist[ii] = i;
firstneigh[i] = neighptr;
numneigh[i] = n;
int pad_end = n;
IP_PRE_neighbor_pad(pad_end, 0);
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#pragma loop_count min=1, max=INTEL_COMPILE_WIDTH-1, \
avg=INTEL_COMPILE_WIDTH/2
#endif
for ( ; n < pad_end; n++)
neighptr[n] = e_nall;
ipage.vgot(n);
if (ipage.status())
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
}
}
list->inum = inum_copy;
}
/* ---------------------------------------------------------------------- */
void NPairTrimIntel::build(NeighList *list)
{
if (_fix->precision() == FixIntel::PREC_MODE_MIXED)
build_t(list, _fix->get_mixed_buffers());
else if (_fix->precision() == FixIntel::PREC_MODE_DOUBLE)
build_t(list, _fix->get_double_buffers());
else
build_t(list, _fix->get_single_buffers());
}

View File

@ -0,0 +1,53 @@
// clang-format off
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: Stan Moore (SNL)
------------------------------------------------------------------------- */
#ifdef NPAIR_CLASS
// clang-format off
NPairStyle(trim/intel,
NPairTrimIntel,
NP_COPY | NP_TRIM | NP_INTEL);
// clang-format on
#else
#ifndef LMP_NPAIR_TRIM_INTEL_H
#define LMP_NPAIR_TRIM_INTEL_H
#include "fix_intel.h"
#include "npair.h"
#if defined(_OPENMP)
#include <omp.h>
#endif
namespace LAMMPS_NS {
class NPairTrimIntel : public NPair {
public:
NPairTrimIntel(class LAMMPS *);
void build(class NeighList *) override;
protected:
FixIntel *_fix;
template <class flt_t, class acc_t> void build_t(NeighList *, IntelBuffers<flt_t, acc_t> *);
};
} // namespace LAMMPS_NS
#endif
#endif