Merge pull request #185 from akohlmey/new-neighbor

New neighbor list code with updates for USER-OMP and USER-DPD
This commit is contained in:
sjplimp
2016-12-13 16:24:41 -07:00
committed by GitHub
313 changed files with 23652 additions and 16330 deletions

2
src/.gitignore vendored
View File

@ -18,6 +18,8 @@
/*_tally.cpp
/*_rx.h
/*_rx.cpp
/*_ssa.h
/*_ssa.cpp
/kokkos.cpp
/kokkos.h

View File

@ -105,11 +105,16 @@ action modify_kokkos.cpp
action modify_kokkos.h
action neigh_bond_kokkos.cpp
action neigh_bond_kokkos.h
action neigh_full_kokkos.h
action neigh_list_kokkos.cpp
action neigh_list_kokkos.h
action neighbor_kokkos.cpp
action neighbor_kokkos.h
action npair_copy_kokkos.cpp
action npair_copy_kokkos.h
action npair_kokkos.cpp
action npair_kokkos.h
action nbin_kokkos.cpp
action nbin_kokkos.h
action math_special_kokkos.cpp
action math_special_kokkos.h
action pair_buck_coul_cut_kokkos.cpp

View File

@ -125,12 +125,10 @@ void FixQEqReaxKokkos<DeviceType>::init()
neighbor->requests[irequest]->pair = 0;
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else { //if (neighflag == HALF || neighflag == HALFTHREAD)
neighbor->requests[irequest]->fix = 1;
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
neighbor->requests[irequest]->ghost = 1;
}
}

View File

@ -168,7 +168,6 @@ void KokkosLMP::accelerator(int narg, char **arg)
else
neighflag = HALF;
} else if (strcmp(arg[iarg+1],"n2") == 0) neighflag = N2;
else if (strcmp(arg[iarg+1],"full/cluster") == 0) neighflag = FULLCLUSTER;
else error->all(FLERR,"Illegal package kokkos command");
iarg += 2;
} else if (strcmp(arg[iarg],"binsize") == 0) {
@ -232,20 +231,6 @@ void KokkosLMP::accelerator(int narg, char **arg)
called by Finish
------------------------------------------------------------------------- */
int KokkosLMP::neigh_list_kokkos(int m)
{
NeighborKokkos *nk = (NeighborKokkos *) neighbor;
if (nk->lists_host[m] && nk->lists_host[m]->d_numneigh.dimension_0())
return 1;
if (nk->lists_device[m] && nk->lists_device[m]->d_numneigh.dimension_0())
return 1;
return 0;
}
/* ----------------------------------------------------------------------
called by Finish
------------------------------------------------------------------------- */
int KokkosLMP::neigh_count(int m)
{
int inum;
@ -255,28 +240,30 @@ int KokkosLMP::neigh_count(int m)
ArrayTypes<LMPHostType>::t_int_1d h_numneigh;
NeighborKokkos *nk = (NeighborKokkos *) neighbor;
if (nk->lists_host[m]) {
inum = nk->lists_host[m]->inum;
if (nk->lists[m]->execution_space == Host) {
NeighListKokkos<LMPHostType>* nlistKK = (NeighListKokkos<LMPHostType>*) nk->lists[m];
inum = nlistKK->inum;
#ifndef KOKKOS_USE_CUDA_UVM
h_ilist = Kokkos::create_mirror_view(nk->lists_host[m]->d_ilist);
h_numneigh = Kokkos::create_mirror_view(nk->lists_host[m]->d_numneigh);
h_ilist = Kokkos::create_mirror_view(nlistKK->d_ilist);
h_numneigh = Kokkos::create_mirror_view(nlistKK->d_numneigh);
#else
h_ilist = nk->lists_host[m]->d_ilist;
h_numneigh = nk->lists_host[m]->d_numneigh;
h_ilist = nlistKK->d_ilist;
h_numneigh = nlistKK->d_numneigh;
#endif
Kokkos::deep_copy(h_ilist,nk->lists_host[m]->d_ilist);
Kokkos::deep_copy(h_numneigh,nk->lists_host[m]->d_numneigh);
} else if (nk->lists_device[m]) {
inum = nk->lists_device[m]->inum;
Kokkos::deep_copy(h_ilist,nlistKK->d_ilist);
Kokkos::deep_copy(h_numneigh,nlistKK->d_numneigh);
} else if (nk->lists[m]->execution_space == Device) {
NeighListKokkos<LMPDeviceType>* nlistKK = (NeighListKokkos<LMPDeviceType>*) nk->lists[m];
inum = nlistKK->inum;
#ifndef KOKKOS_USE_CUDA_UVM
h_ilist = Kokkos::create_mirror_view(nk->lists_device[m]->d_ilist);
h_numneigh = Kokkos::create_mirror_view(nk->lists_device[m]->d_numneigh);
h_ilist = Kokkos::create_mirror_view(nlistKK->d_ilist);
h_numneigh = Kokkos::create_mirror_view(nlistKK->d_numneigh);
#else
h_ilist = nk->lists_device[m]->d_ilist;
h_numneigh = nk->lists_device[m]->d_numneigh;
h_ilist = nlistKK->d_ilist;
h_numneigh = nlistKK->d_numneigh;
#endif
Kokkos::deep_copy(h_ilist,nk->lists_device[m]->d_ilist);
Kokkos::deep_copy(h_numneigh,nk->lists_device[m]->d_numneigh);
Kokkos::deep_copy(h_ilist,nlistKK->d_ilist);
Kokkos::deep_copy(h_numneigh,nlistKK->d_numneigh);
}
for (int i = 0; i < inum; i++) nneigh += h_numneigh[h_ilist[i]];

View File

@ -34,7 +34,6 @@ class KokkosLMP : protected Pointers {
KokkosLMP(class LAMMPS *, int, char **);
~KokkosLMP();
void accelerator(int, char **);
int neigh_list_kokkos(int);
int neigh_count(int);
private:
static void my_signal_handler(int);

145
src/KOKKOS/nbin_kokkos.cpp Normal file
View File

@ -0,0 +1,145 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#include "nbin_kokkos.h"
#include "neighbor.h"
#include "atom_kokkos.h"
#include "group.h"
#include "domain.h"
#include "comm.h"
#include "update.h"
#include "error.h"
#include "atom_masks.h"
using namespace LAMMPS_NS;
enum{NSQ,BIN,MULTI}; // also in Neighbor
#define SMALL 1.0e-6
#define CUT2BIN_RATIO 100
/* ---------------------------------------------------------------------- */
template<class DeviceType>
NBinKokkos<DeviceType>::NBinKokkos(LAMMPS *lmp) : NBinStandard(lmp) {
atoms_per_bin = 16;
d_resize = typename AT::t_int_scalar("NeighborKokkosFunctor::resize");
#ifndef KOKKOS_USE_CUDA_UVM
h_resize = Kokkos::create_mirror_view(d_resize);
#else
h_resize = d_resize;
#endif
h_resize() = 1;
}
/* ----------------------------------------------------------------------
setup neighbor binning geometry
bin numbering in each dimension is global:
0 = 0.0 to binsize, 1 = binsize to 2*binsize, etc
nbin-1,nbin,etc = bbox-binsize to bbox, bbox to bbox+binsize, etc
-1,-2,etc = -binsize to 0.0, -2*binsize to -binsize, etc
code will work for any binsize
since next(xyz) and stencil extend as far as necessary
binsize = 1/2 of cutoff is roughly optimal
for orthogonal boxes:
a dim must be filled exactly by integer # of bins
in periodic, procs on both sides of PBC must see same bin boundary
in non-periodic, coord2bin() still assumes this by use of nbin xyz
for triclinic boxes:
tilted simulation box cannot contain integer # of bins
stencil & neigh list built differently to account for this
mbinlo = lowest global bin any of my ghost atoms could fall into
mbinhi = highest global bin any of my ghost atoms could fall into
mbin = number of bins I need in a dimension
------------------------------------------------------------------------- */
template<class DeviceType>
void NBinKokkos<DeviceType>::bin_atoms_setup(int nall)
{
if (mbins > k_bins.d_view.dimension_0()) {
k_bins = DAT::tdual_int_2d("Neighbor::d_bins",mbins,atoms_per_bin);
bins = k_bins.view<DeviceType>();
k_bincount = DAT::tdual_int_1d("Neighbor::d_bincount",mbins);
bincount = k_bincount.view<DeviceType>();
last_bin_memory = update->ntimestep;
}
last_bin = update->ntimestep;
}
/* ----------------------------------------------------------------------
bin owned and ghost atoms
------------------------------------------------------------------------- */
template<class DeviceType>
void NBinKokkos<DeviceType>::bin_atoms()
{
h_resize() = 1;
while(h_resize() > 0) {
h_resize() = 0;
deep_copy(d_resize, h_resize);
MemsetZeroFunctor<DeviceType> f_zero;
f_zero.ptr = (void*) k_bincount.view<DeviceType>().ptr_on_device();
Kokkos::parallel_for(mbins, f_zero);
DeviceType::fence();
atomKK->sync(ExecutionSpaceFromDevice<DeviceType>::space,X_MASK);
x = atomKK->k_x.view<DeviceType>();
bboxlo_[0] = bboxlo[0]; bboxlo_[1] = bboxlo[1]; bboxlo_[2] = bboxlo[2];
bboxhi_[0] = bboxhi[0]; bboxhi_[1] = bboxhi[1]; bboxhi_[2] = bboxhi[2];
NPairKokkosBinAtomsFunctor<DeviceType> f(*this);
Kokkos::parallel_for(atom->nlocal+atom->nghost, f);
DeviceType::fence();
deep_copy(h_resize, d_resize);
if(h_resize()) {
atoms_per_bin += 16;
k_bins = DAT::tdual_int_2d("bins", mbins, atoms_per_bin);
bins = k_bins.view<DeviceType>();
c_bins = bins;
last_bin_memory = update->ntimestep;
}
}
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void NBinKokkos<DeviceType>::binatomsItem(const int &i) const
{
const int ibin = coord2bin(x(i, 0), x(i, 1), x(i, 2));
const int ac = Kokkos::atomic_fetch_add(&bincount[ibin], (int)1);
if(ac < bins.dimension_1()) {
bins(ibin, ac) = i;
} else {
d_resize() = 1;
}
}
namespace LAMMPS_NS {
template class NBinKokkos<LMPDeviceType>;
#ifdef KOKKOS_HAVE_CUDA
template class NBinKokkos<LMPHostType>;
#endif
}

153
src/KOKKOS/nbin_kokkos.h Normal file
View File

@ -0,0 +1,153 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef NBIN_CLASS
NBinStyle(kk/host,
NBinKokkos<LMPHostType>,
NB_KOKKOS_HOST)
NBinStyle(kk/device,
NBinKokkos<LMPDeviceType>,
NB_KOKKOS_DEVICE)
#else
#ifndef LMP_NBIN_KOKKOS_H
#define LMP_NBIN_KOKKOS_H
#include "nbin_standard.h"
#include "kokkos_type.h"
namespace LAMMPS_NS {
template<class DeviceType>
class NBinKokkos : public NBinStandard {
public:
typedef ArrayTypes<DeviceType> AT;
NBinKokkos(class LAMMPS *);
~NBinKokkos() {}
void bin_atoms_setup(int);
void bin_atoms();
int atoms_per_bin;
DAT::tdual_int_1d k_bincount;
DAT::tdual_int_2d k_bins;
typename AT::t_int_1d bincount;
const typename AT::t_int_1d_const c_bincount;
typename AT::t_int_2d bins;
typename AT::t_int_2d_const c_bins;
typename AT::t_int_scalar d_resize;
typename ArrayTypes<LMPHostType>::t_int_scalar h_resize;
typename AT::t_x_array_randomread x;
KOKKOS_INLINE_FUNCTION
void binatomsItem(const int &i) const;
KOKKOS_INLINE_FUNCTION
int coord2bin(const X_FLOAT & x,const X_FLOAT & y,const X_FLOAT & z) const
{
int ix,iy,iz;
if (x >= bboxhi_[0])
ix = static_cast<int> ((x-bboxhi_[0])*bininvx) + nbinx;
else if (x >= bboxlo_[0]) {
ix = static_cast<int> ((x-bboxlo_[0])*bininvx);
ix = MIN(ix,nbinx-1);
} else
ix = static_cast<int> ((x-bboxlo_[0])*bininvx) - 1;
if (y >= bboxhi_[1])
iy = static_cast<int> ((y-bboxhi_[1])*bininvy) + nbiny;
else if (y >= bboxlo_[1]) {
iy = static_cast<int> ((y-bboxlo_[1])*bininvy);
iy = MIN(iy,nbiny-1);
} else
iy = static_cast<int> ((y-bboxlo_[1])*bininvy) - 1;
if (z >= bboxhi_[2])
iz = static_cast<int> ((z-bboxhi_[2])*bininvz) + nbinz;
else if (z >= bboxlo_[2]) {
iz = static_cast<int> ((z-bboxlo_[2])*bininvz);
iz = MIN(iz,nbinz-1);
} else
iz = static_cast<int> ((z-bboxlo_[2])*bininvz) - 1;
return (iz-mbinzlo)*mbiny*mbinx + (iy-mbinylo)*mbinx + (ix-mbinxlo);
}
KOKKOS_INLINE_FUNCTION
int coord2bin(const X_FLOAT & x,const X_FLOAT & y,const X_FLOAT & z, int* i) const
{
int ix,iy,iz;
if (x >= bboxhi_[0])
ix = static_cast<int> ((x-bboxhi_[0])*bininvx) + nbinx;
else if (x >= bboxlo_[0]) {
ix = static_cast<int> ((x-bboxlo_[0])*bininvx);
ix = MIN(ix,nbinx-1);
} else
ix = static_cast<int> ((x-bboxlo_[0])*bininvx) - 1;
if (y >= bboxhi_[1])
iy = static_cast<int> ((y-bboxhi_[1])*bininvy) + nbiny;
else if (y >= bboxlo_[1]) {
iy = static_cast<int> ((y-bboxlo_[1])*bininvy);
iy = MIN(iy,nbiny-1);
} else
iy = static_cast<int> ((y-bboxlo_[1])*bininvy) - 1;
if (z >= bboxhi_[2])
iz = static_cast<int> ((z-bboxhi_[2])*bininvz) + nbinz;
else if (z >= bboxlo_[2]) {
iz = static_cast<int> ((z-bboxlo_[2])*bininvz);
iz = MIN(iz,nbinz-1);
} else
iz = static_cast<int> ((z-bboxlo_[2])*bininvz) - 1;
i[0] = ix - mbinxlo;
i[1] = iy - mbinylo;
i[2] = iz - mbinzlo;
return (iz-mbinzlo)*mbiny*mbinx + (iy-mbinylo)*mbinx + (ix-mbinxlo);
}
private:
double bboxlo_[3],bboxhi_[3];
};
template<class DeviceType>
struct NPairKokkosBinAtomsFunctor {
typedef DeviceType device_type;
const NBinKokkos<DeviceType> c;
NPairKokkosBinAtomsFunctor(const NBinKokkos<DeviceType> &_c):
c(_c) {};
~NPairKokkosBinAtomsFunctor() {}
KOKKOS_INLINE_FUNCTION
void operator() (const int & i) const {
c.binatomsItem(i);
}
};
}
#endif
#endif
/* ERROR/WARNING messages:
*/

View File

@ -34,9 +34,8 @@ void NeighListKokkos<Device>::clean_copy()
ipage = NULL;
dpage = NULL;
maxstencil = 0;
ghostflag = 0;
maxstencil_multi = 0;
maxatoms = 0;
}
/* ---------------------------------------------------------------------- */
@ -70,49 +69,6 @@ void NeighListKokkos<Device>::grow(int nmax)
/* ---------------------------------------------------------------------- */
template<class Device>
void NeighListKokkos<Device>::stencil_allocate(int smax, int style)
{
int i;
if (style == BIN) {
if (smax > maxstencil) {
maxstencil = smax;
d_stencil =
memory->create_kokkos(d_stencil,h_stencil,stencil,maxstencil,
"neighlist:stencil");
if (ghostflag) {
memory->create_kokkos(d_stencilxyz,h_stencilxyz,stencilxyz,maxstencil,
3,"neighlist:stencilxyz");
}
}
} else {
int n = atom->ntypes;
if (maxstencil_multi == 0) {
nstencil_multi = new int[n+1];
stencil_multi = new int*[n+1];
distsq_multi = new double*[n+1];
for (i = 1; i <= n; i++) {
nstencil_multi[i] = 0;
stencil_multi[i] = NULL;
distsq_multi[i] = NULL;
}
}
if (smax > maxstencil_multi) {
maxstencil_multi = smax;
for (i = 1; i <= n; i++) {
memory->destroy(stencil_multi[i]);
memory->destroy(distsq_multi[i]);
memory->create(stencil_multi[i],maxstencil_multi,
"neighlist:stencil_multi");
memory->create(distsq_multi[i],maxstencil_multi,
"neighlist:distsq_multi");
}
}
}
}
namespace LAMMPS_NS {
template class NeighListKokkos<LMPDeviceType>;
#ifdef KOKKOS_HAVE_CUDA

View File

@ -20,7 +20,7 @@
namespace LAMMPS_NS {
enum{FULL=1u,HALFTHREAD=2u,HALF=4u,N2=8u,FULLCLUSTER=16u};
enum{FULL=1u,HALFTHREAD=2u,HALF=4u,N2=8u};
class AtomNeighbors
{
@ -74,14 +74,12 @@ public:
typename DAT::tdual_int_1d k_ilist; // local indices of I atoms
typename ArrayTypes<Device>::t_int_1d d_ilist;
typename ArrayTypes<Device>::t_int_1d d_numneigh; // # of J neighs for each I
typename ArrayTypes<Device>::t_int_1d d_stencil; // # of J neighs for each I
typename ArrayTypes<LMPHostType>::t_int_1d h_stencil; // # of J neighs per I
typename ArrayTypes<Device>::t_int_1d_3 d_stencilxyz;
typename ArrayTypes<LMPHostType>::t_int_1d_3 h_stencilxyz;
NeighListKokkos(class LAMMPS *lmp):
NeighList(lmp) {_stride = 1; maxneighs = 16;};
~NeighListKokkos() {stencil = NULL; numneigh = NULL; ilist = NULL;};
NeighList(lmp) {_stride = 1; maxneighs = 16; kokkos = 1; maxatoms = 0;
execution_space = ExecutionSpaceFromDevice<Device>::space;
};
~NeighListKokkos() {numneigh = NULL; ilist = NULL;};
KOKKOS_INLINE_FUNCTION
AtomNeighbors get_neighbors(const int &i) const {
@ -99,7 +97,8 @@ public:
int& num_neighs(const int & i) const {
return d_numneigh(i);
}
void stencil_allocate(int smax, int style);
private:
int maxatoms;
};
}

View File

@ -1,4 +1,4 @@
;/* ----------------------------------------------------------------------
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
@ -26,6 +26,10 @@
#include "angle.h"
#include "dihedral.h"
#include "improper.h"
#include "style_nbin.h"
#include "style_nstencil.h"
#include "style_npair.h"
#include "style_ntopo.h"
using namespace LAMMPS_NS;
@ -36,18 +40,11 @@ enum{NSQ,BIN,MULTI}; // also in neigh_list.cpp
NeighborKokkos::NeighborKokkos(LAMMPS *lmp) : Neighbor(lmp),
neighbond_host(lmp),neighbond_device(lmp)
{
atoms_per_bin = 16;
nlist_host = 0;
lists_host = NULL;
pair_build_host = NULL;
stencil_create_host = NULL;
nlist_device = 0;
lists_device = NULL;
pair_build_device = NULL;
stencil_create_device = NULL;
device_flag = 0;
bondlist = NULL;
anglelist = NULL;
dihedrallist = NULL;
improperlist = NULL;
}
/* ---------------------------------------------------------------------- */
@ -58,14 +55,6 @@ NeighborKokkos::~NeighborKokkos()
memory->destroy_kokkos(k_cutneighsq,cutneighsq);
cutneighsq = NULL;
for (int i = 0; i < nlist_host; i++) delete lists_host[i];
delete [] lists_host;
for (int i = 0; i < nlist_device; i++) delete lists_device[i];
delete [] lists_device;
delete [] pair_build_device;
delete [] pair_build_host;
memory->destroy_kokkos(k_ex_type,ex_type);
memory->destroy_kokkos(k_ex1_type,ex1_type);
memory->destroy_kokkos(k_ex2_type,ex2_type);
@ -89,6 +78,11 @@ void NeighborKokkos::init()
{
atomKK = (AtomKokkos *) atom;
Neighbor::init();
// 1st time allocation of xhold
if (dist_check)
xhold = DAT::tdual_x_array("neigh:xhold",maxhold);
}
/* ---------------------------------------------------------------------- */
@ -101,158 +95,16 @@ void NeighborKokkos::init_cutneighsq_kokkos(int n)
/* ---------------------------------------------------------------------- */
int NeighborKokkos::init_lists_kokkos()
{
int i;
for (i = 0; i < nlist_host; i++) delete lists_host[i];
delete [] lists_host;
delete [] pair_build_host;
delete [] stencil_create_host;
nlist_host = 0;
for (i = 0; i < nlist_device; i++) delete lists_device[i];
delete [] lists_device;
delete [] pair_build_device;
delete [] stencil_create_device;
nlist_device = 0;
nlist = 0;
for (i = 0; i < nrequest; i++) {
if (requests[i]->kokkos_device) nlist_device++;
else if (requests[i]->kokkos_host) nlist_host++;
else nlist++;
}
lists_host = new NeighListKokkos<LMPHostType>*[nrequest];
pair_build_host = new PairPtrHost[nrequest];
stencil_create_host = new StencilPtrHost[nrequest];
for (i = 0; i < nrequest; i++) {
lists_host[i] = NULL;
pair_build_host[i] = NULL;
stencil_create_host[i] = NULL;
}
for (i = 0; i < nrequest; i++) {
if (!requests[i]->kokkos_host) continue;
lists_host[i] = new NeighListKokkos<LMPHostType>(lmp);
lists_host[i]->index = i;
lists_host[i]->dnum = requests[i]->dnum;
if (requests[i]->pair) {
Pair *pair = (Pair *) requests[i]->requestor;
pair->init_list(requests[i]->id,lists_host[i]);
}
if (requests[i]->fix) {
Fix *fix = (Fix *) requests[i]->requestor;
fix->init_list(requests[i]->id,lists_host[i]);
}
}
lists_device = new NeighListKokkos<LMPDeviceType>*[nrequest];
pair_build_device = new PairPtrDevice[nrequest];
stencil_create_device = new StencilPtrDevice[nrequest];
for (i = 0; i < nrequest; i++) {
lists_device[i] = NULL;
pair_build_device[i] = NULL;
stencil_create_device[i] = NULL;
}
for (i = 0; i < nrequest; i++) {
if (!requests[i]->kokkos_device) continue;
lists_device[i] = new NeighListKokkos<LMPDeviceType>(lmp);
lists_device[i]->index = i;
lists_device[i]->dnum = requests[i]->dnum;
if (requests[i]->pair) {
Pair *pair = (Pair *) requests[i]->requestor;
pair->init_list(requests[i]->id,lists_device[i]);
}
if (requests[i]->fix) {
Fix *fix = (Fix *) requests[i]->requestor;
fix->init_list(requests[i]->id,lists_device[i]);
}
}
// 1st time allocation of xhold
if (dist_check)
xhold = DAT::tdual_x_array("neigh:xhold",maxhold);
// return # of non-Kokkos lists
return nlist;
}
/* ---------------------------------------------------------------------- */
void NeighborKokkos::init_list_flags1_kokkos(int i)
void NeighborKokkos::create_kokkos_list(int i)
{
if (style != BIN)
error->all(FLERR,"KOKKOS package only supports 'bin' neighbor lists");
if (lists_host[i]) {
lists_host[i]->buildflag = 1;
if (pair_build_host[i] == NULL) lists_host[i]->buildflag = 0;
if (requests[i]->occasional) lists_host[i]->buildflag = 0;
lists_host[i]->growflag = 1;
if (requests[i]->copy) lists_host[i]->growflag = 0;
lists_host[i]->stencilflag = 1;
if (style == NSQ) lists_host[i]->stencilflag = 0;
if (stencil_create[i] == NULL) lists_host[i]->stencilflag = 0;
lists_host[i]->ghostflag = 0;
if (requests[i]->ghost) lists_host[i]->ghostflag = 1;
if (requests[i]->ghost && !requests[i]->occasional) anyghostlist = 1;
}
if (lists_device[i]) {
lists_device[i]->buildflag = 1;
if (pair_build_device[i] == NULL) lists_device[i]->buildflag = 0;
if (requests[i]->occasional) lists_device[i]->buildflag = 0;
lists_device[i]->growflag = 1;
if (requests[i]->copy) lists_device[i]->growflag = 0;
lists_device[i]->stencilflag = 1;
if (style == NSQ) lists_device[i]->stencilflag = 0;
if (stencil_create[i] == NULL) lists_device[i]->stencilflag = 0;
lists_device[i]->ghostflag = 0;
if (requests[i]->ghost) lists_device[i]->ghostflag = 1;
if (requests[i]->ghost && !requests[i]->occasional) anyghostlist = 1;
}
}
/* ---------------------------------------------------------------------- */
void NeighborKokkos::init_list_flags2_kokkos(int i)
{
if (lists_host[i]) {
if (lists_host[i]->buildflag) blist[nblist++] = i;
if (lists_host[i]->growflag && requests[i]->occasional == 0)
glist[nglist++] = i;
if (lists_host[i]->stencilflag && requests[i]->occasional == 0)
slist[nslist++] = i;
}
if (lists_device[i]) {
if (lists_device[i]->buildflag) blist[nblist++] = i;
if (lists_device[i]->growflag && requests[i]->occasional == 0)
glist[nglist++] = i;
if (lists_device[i]->stencilflag && requests[i]->occasional == 0)
slist[nslist++] = i;
}
}
/* ---------------------------------------------------------------------- */
void NeighborKokkos::init_list_grow_kokkos(int i)
{
if (lists_host[i]!=NULL && lists_host[i]->growflag)
lists_host[i]->grow(maxatom);
if (lists_device[i]!=NULL && lists_device[i]->growflag)
lists_device[i]->grow(maxatom);
if (requests[i]->kokkos_device) {
lists[i] = new NeighListKokkos<LMPDeviceType>(lmp);
device_flag = 1;
} else if (requests[i]->kokkos_host)
lists[i] = new NeighListKokkos<LMPHostType>(lmp);
}
/* ---------------------------------------------------------------------- */
@ -281,49 +133,6 @@ void NeighborKokkos::init_ex_mol_bit_kokkos()
k_ex_mol_bit.modify<LMPHostType>();
}
/* ---------------------------------------------------------------------- */
void NeighborKokkos::choose_build(int index, NeighRequest *rq)
{
if (rq->kokkos_host != 0) {
PairPtrHost pb = NULL;
if (rq->ghost) {
if (rq->full) {
if (rq->full_cluster) pb = &NeighborKokkos::full_bin_cluster_kokkos<LMPHostType>;
else pb = &NeighborKokkos::full_bin_kokkos<LMPHostType,0,1>;
}
else if (rq->half) pb = &NeighborKokkos::full_bin_kokkos<LMPHostType,1,1>;
} else {
if (rq->full) {
if (rq->full_cluster) pb = &NeighborKokkos::full_bin_cluster_kokkos<LMPHostType>;
else pb = &NeighborKokkos::full_bin_kokkos<LMPHostType,0,0>;
}
else if (rq->half) pb = &NeighborKokkos::full_bin_kokkos<LMPHostType,1,0>;
}
pair_build_host[index] = pb;
}
if (rq->kokkos_device != 0) {
PairPtrDevice pb = NULL;
if (rq->ghost) {
if (rq->full) {
if (rq->full_cluster) pb = &NeighborKokkos::full_bin_cluster_kokkos<LMPDeviceType>;
else pb = &NeighborKokkos::full_bin_kokkos<LMPDeviceType,0,1>;
}
else if (rq->half) pb = &NeighborKokkos::full_bin_kokkos<LMPDeviceType,1,1>;
} else {
if (rq->full) {
if (rq->full_cluster) pb = &NeighborKokkos::full_bin_cluster_kokkos<LMPDeviceType>;
else pb = &NeighborKokkos::full_bin_kokkos<LMPDeviceType,0,0>;
}
else if (rq->half) pb = &NeighborKokkos::full_bin_kokkos<LMPDeviceType,1,0>;
}
pair_build_device[index] = pb;
return;
}
Neighbor::choose_build(index,rq);
}
/* ----------------------------------------------------------------------
if any atom moved trigger distance (half of neighbor skin) return 1
shrink trigger distance if box size has changed
@ -337,7 +146,7 @@ void NeighborKokkos::choose_build(int index, NeighRequest *rq)
int NeighborKokkos::check_distance()
{
if (nlist_device)
if (device_flag)
check_distance_kokkos<LMPDeviceType>();
else
check_distance_kokkos<LMPHostType>();
@ -417,7 +226,7 @@ void NeighborKokkos::operator()(TagNeighborCheckDistance<DeviceType>, const int
void NeighborKokkos::build(int topoflag)
{
if (nlist_device)
if (device_flag)
build_kokkos<LMPDeviceType>(topoflag);
else
build_kokkos<LMPHostType>(topoflag);
@ -428,18 +237,25 @@ void NeighborKokkos::build_kokkos(int topoflag)
{
typedef DeviceType device_type;
int i;
int i,m;
ago = 0;
ncalls++;
lastcall = update->ntimestep;
int nlocal = atom->nlocal;
int nall = nlocal + atom->nghost;
// check that using special bond flags will not overflow neigh lists
if (nall > NEIGHMASK)
error->one(FLERR,"Too many local+ghost atoms for neighbor list");
// store current atom positions and box size if needed
if (dist_check) {
atomKK->sync(ExecutionSpaceFromDevice<DeviceType>::space,X_MASK);
x = atomKK->k_x;
int nlocal = atom->nlocal;
if (includegroup) nlocal = atom->nfirst;
int maxhold_kokkos = xhold.view<DeviceType>().dimension_0();
if (atom->nmax > maxhold || maxhold_kokkos < maxhold) {
@ -471,54 +287,33 @@ void NeighborKokkos::build_kokkos(int topoflag)
}
}
// if any lists store neighbors of ghosts:
// invoke grow() if nlocal+nghost exceeds previous list size
// else only invoke grow() if nlocal exceeds previous list size
// only for lists with growflag set and which are perpetual (glist)
// bin atoms for all NBin instances
// not just NBin associated with perpetual lists
// b/c cannot wait to bin occasional lists in build_one() call
// if bin then, atoms may have moved outside of proc domain & bin extent,
// leading to errors or even a crash
if (anyghostlist && atom->nmax > maxatom) {
maxatom = atom->nmax;
for (i = 0; i < nglist; i++)
if (lists[glist[i]]) lists[glist[i]]->grow(maxatom);
else init_list_grow_kokkos(glist[i]);
} else if (atom->nmax > maxatom) {
maxatom = atom->nmax;
for (i = 0; i < nglist; i++)
if (lists[glist[i]]) lists[glist[i]]->grow(maxatom);
else init_list_grow_kokkos(glist[i]);
if (style != NSQ) {
for (int i = 0; i < nbin; i++) {
neigh_bin[i]->bin_atoms_setup(nall);
neigh_bin[i]->bin_atoms();
}
}
// extend atom bin list if necessary
// build pairwise lists for all perpetual NPair/NeighList
// grow() with nlocal/nall args so that only realloc if have to
if (style != NSQ && atom->nmax > maxbin) {
maxbin = atom->nmax;
memory->destroy(bins);
memory->create(bins,maxbin,"bins");
}
// check that using special bond flags will not overflow neigh lists
if (atom->nlocal+atom->nghost > NEIGHMASK)
error->one(FLERR,"Too many local+ghost atoms for neighbor list");
// invoke building of pair and molecular topology neighbor lists
// only for pairwise lists with buildflag set
// blist is for standard neigh lists, otherwise is a Kokkos list
for (i = 0; i < nblist; i++) {
if (lists[blist[i]]) {
atomKK->sync(Host,ALL_MASK);
(this->*pair_build[blist[i]])(lists[blist[i]]);
} else {
if (lists_host[blist[i]])
(this->*pair_build_host[blist[i]])(lists_host[blist[i]]);
else if (lists_device[blist[i]])
(this->*pair_build_device[blist[i]])(lists_device[blist[i]]);
}
for (i = 0; i < npair_perpetual; i++) {
m = plist[i];
lists[m]->grow(nlocal,nall);
neigh_pair[m]->build_setup();
neigh_pair[m]->build(lists[m]);
}
if (atom->molecular && topoflag)
build_topology_kokkos();
// build topology lists for bonds/angles/etc
if (atom->molecular && topoflag) build_topology();
}
template<class DeviceType>
@ -532,26 +327,6 @@ void NeighborKokkos::operator()(TagNeighborXhold<DeviceType>, const int &i) cons
/* ---------------------------------------------------------------------- */
void NeighborKokkos::setup_bins_kokkos(int i)
{
if (lists_host[slist[i]]) {
lists_host[slist[i]]->stencil_allocate(smax,style);
(this->*stencil_create[slist[i]])(lists_host[slist[i]],sx,sy,sz);
} else if (lists_device[slist[i]]) {
lists_device[slist[i]]->stencil_allocate(smax,style);
(this->*stencil_create[slist[i]])(lists_device[slist[i]],sx,sy,sz);
}
//if (i < nslist-1) return; // this won't work if a non-kokkos neighbor list is last
if (maxhead > k_bins.d_view.dimension_0()) {
k_bins = DAT::tdual_int_2d("Neighbor::d_bins",maxhead,atoms_per_bin);
k_bincount = DAT::tdual_int_1d("Neighbor::d_bincount",maxhead);
}
}
/* ---------------------------------------------------------------------- */
void NeighborKokkos::modify_ex_type_grow_kokkos(){
memory->grow_kokkos(k_ex1_type,ex1_type,maxex_type,"neigh:ex1_type");
k_ex1_type.modify<LMPHostType>();
@ -575,8 +350,8 @@ void NeighborKokkos::modify_mol_group_grow_kokkos(){
/* ---------------------------------------------------------------------- */
void NeighborKokkos::init_topology_kokkos() {
if (nlist_device) {
void NeighborKokkos::init_topology() {
if (device_flag) {
neighbond_device.init_topology_kk();
} else {
neighbond_host.init_topology_kk();
@ -588,8 +363,8 @@ void NeighborKokkos::init_topology_kokkos() {
normally built with pair lists, but USER-CUDA separates them
------------------------------------------------------------------------- */
void NeighborKokkos::build_topology_kokkos() {
if (nlist_device) {
void NeighborKokkos::build_topology() {
if (device_flag) {
neighbond_device.build_topology_kk();
k_bondlist = neighbond_device.k_bondlist;
@ -637,7 +412,3 @@ void NeighborKokkos::build_topology_kokkos() {
k_improperlist.modify<LMPHostType>();
}
}
// include to trigger instantiation of templated functions
#include "neigh_full_kokkos.h"

View File

@ -22,316 +22,6 @@
namespace LAMMPS_NS {
template<class Device>
class NeighborKokkosExecute
{
typedef ArrayTypes<Device> AT;
public:
NeighListKokkos<Device> neigh_list;
const typename AT::t_xfloat_2d_randomread cutneighsq;
const typename AT::t_int_1d bincount;
const typename AT::t_int_1d_const c_bincount;
typename AT::t_int_2d bins;
typename AT::t_int_2d_const c_bins;
const typename AT::t_x_array_randomread x;
const typename AT::t_int_1d_const type,mask,molecule;
const typename AT::t_tagint_1d_const tag;
const typename AT::t_tagint_2d_const special;
const typename AT::t_int_2d_const nspecial;
const int molecular;
int moltemplate;
int special_flag[4];
const int nbinx,nbiny,nbinz;
const int mbinx,mbiny,mbinz;
const int mbinxlo,mbinylo,mbinzlo;
const X_FLOAT bininvx,bininvy,bininvz;
X_FLOAT bboxhi[3],bboxlo[3];
const int nlocal;
const int exclude;
const int nex_type;
const int maxex_type;
const typename AT::t_int_1d_const ex1_type,ex2_type;
const typename AT::t_int_2d_const ex_type;
const int nex_group;
const int maxex_group;
const typename AT::t_int_1d_const ex1_group,ex2_group;
const typename AT::t_int_1d_const ex1_bit,ex2_bit;
const int nex_mol;
const int maxex_mol;
const typename AT::t_int_1d_const ex_mol_group;
const typename AT::t_int_1d_const ex_mol_bit;
typename AT::t_int_scalar resize;
typename AT::t_int_scalar new_maxneighs;
typename ArrayTypes<LMPHostType>::t_int_scalar h_resize;
typename ArrayTypes<LMPHostType>::t_int_scalar h_new_maxneighs;
const int xperiodic, yperiodic, zperiodic;
const int xprd_half, yprd_half, zprd_half;
NeighborKokkosExecute(
const NeighListKokkos<Device> &_neigh_list,
const typename AT::t_xfloat_2d_randomread &_cutneighsq,
const typename AT::t_int_1d &_bincount,
const typename AT::t_int_2d &_bins,
const int _nlocal,
const typename AT::t_x_array_randomread &_x,
const typename AT::t_int_1d_const &_type,
const typename AT::t_int_1d_const &_mask,
const typename AT::t_int_1d_const &_molecule,
const typename AT::t_tagint_1d_const &_tag,
const typename AT::t_tagint_2d_const &_special,
const typename AT::t_int_2d_const &_nspecial,
const int &_molecular,
const int & _nbinx,const int & _nbiny,const int & _nbinz,
const int & _mbinx,const int & _mbiny,const int & _mbinz,
const int & _mbinxlo,const int & _mbinylo,const int & _mbinzlo,
const X_FLOAT &_bininvx,const X_FLOAT &_bininvy,const X_FLOAT &_bininvz,
const int & _exclude,const int & _nex_type,const int & _maxex_type,
const typename AT::t_int_1d_const & _ex1_type,
const typename AT::t_int_1d_const & _ex2_type,
const typename AT::t_int_2d_const & _ex_type,
const int & _nex_group,const int & _maxex_group,
const typename AT::t_int_1d_const & _ex1_group,
const typename AT::t_int_1d_const & _ex2_group,
const typename AT::t_int_1d_const & _ex1_bit,
const typename AT::t_int_1d_const & _ex2_bit,
const int & _nex_mol,const int & _maxex_mol,
const typename AT::t_int_1d_const & _ex_mol_group,
const typename AT::t_int_1d_const & _ex_mol_bit,
const X_FLOAT *_bboxhi, const X_FLOAT* _bboxlo,
const int & _xperiodic, const int & _yperiodic, const int & _zperiodic,
const int & _xprd_half, const int & _yprd_half, const int & _zprd_half):
neigh_list(_neigh_list), cutneighsq(_cutneighsq),
bincount(_bincount),c_bincount(_bincount),bins(_bins),c_bins(_bins),
nlocal(_nlocal),
x(_x),type(_type),mask(_mask),molecule(_molecule),
tag(_tag),special(_special),nspecial(_nspecial),molecular(_molecular),
nbinx(_nbinx),nbiny(_nbiny),nbinz(_nbinz),
mbinx(_mbinx),mbiny(_mbiny),mbinz(_mbinz),
mbinxlo(_mbinxlo),mbinylo(_mbinylo),mbinzlo(_mbinzlo),
bininvx(_bininvx),bininvy(_bininvy),bininvz(_bininvz),
exclude(_exclude),nex_type(_nex_type),maxex_type(_maxex_type),
ex1_type(_ex1_type),ex2_type(_ex2_type),ex_type(_ex_type),
nex_group(_nex_group),maxex_group(_maxex_group),
ex1_group(_ex1_group),ex2_group(_ex2_group),
ex1_bit(_ex1_bit),ex2_bit(_ex2_bit),nex_mol(_nex_mol),maxex_mol(_maxex_mol),
ex_mol_group(_ex_mol_group),ex_mol_bit(_ex_mol_bit),
xperiodic(_xperiodic),yperiodic(_yperiodic),zperiodic(_zperiodic),
xprd_half(_xprd_half),yprd_half(_yprd_half),zprd_half(_zprd_half){
if (molecular == 2) moltemplate = 1;
else moltemplate = 0;
bboxlo[0] = _bboxlo[0]; bboxlo[1] = _bboxlo[1]; bboxlo[2] = _bboxlo[2];
bboxhi[0] = _bboxhi[0]; bboxhi[1] = _bboxhi[1]; bboxhi[2] = _bboxhi[2];
resize = typename AT::t_int_scalar("NeighborKokkosFunctor::resize");
#ifndef KOKKOS_USE_CUDA_UVM
h_resize = Kokkos::create_mirror_view(resize);
#else
h_resize = resize;
#endif
h_resize() = 1;
new_maxneighs = typename AT::
t_int_scalar("NeighborKokkosFunctor::new_maxneighs");
#ifndef KOKKOS_USE_CUDA_UVM
h_new_maxneighs = Kokkos::create_mirror_view(new_maxneighs);
#else
h_new_maxneighs = new_maxneighs;
#endif
h_new_maxneighs() = neigh_list.maxneighs;
};
~NeighborKokkosExecute() {neigh_list.clean_copy();};
template<int HalfNeigh, int GhostNewton>
KOKKOS_FUNCTION
void build_Item(const int &i) const;
template<int HalfNeigh>
KOKKOS_FUNCTION
void build_Item_Ghost(const int &i) const;
template<int ClusterSize>
KOKKOS_FUNCTION
void build_cluster_Item(const int &i) const;
#ifdef KOKKOS_HAVE_CUDA
template<int HalfNeigh, int GhostNewton>
__device__ inline
void build_ItemCuda(typename Kokkos::TeamPolicy<Device>::member_type dev) const;
#endif
KOKKOS_INLINE_FUNCTION
void binatomsItem(const int &i) const;
KOKKOS_INLINE_FUNCTION
int coord2bin(const X_FLOAT & x,const X_FLOAT & y,const X_FLOAT & z) const
{
int ix,iy,iz;
if (x >= bboxhi[0])
ix = static_cast<int> ((x-bboxhi[0])*bininvx) + nbinx;
else if (x >= bboxlo[0]) {
ix = static_cast<int> ((x-bboxlo[0])*bininvx);
ix = MIN(ix,nbinx-1);
} else
ix = static_cast<int> ((x-bboxlo[0])*bininvx) - 1;
if (y >= bboxhi[1])
iy = static_cast<int> ((y-bboxhi[1])*bininvy) + nbiny;
else if (y >= bboxlo[1]) {
iy = static_cast<int> ((y-bboxlo[1])*bininvy);
iy = MIN(iy,nbiny-1);
} else
iy = static_cast<int> ((y-bboxlo[1])*bininvy) - 1;
if (z >= bboxhi[2])
iz = static_cast<int> ((z-bboxhi[2])*bininvz) + nbinz;
else if (z >= bboxlo[2]) {
iz = static_cast<int> ((z-bboxlo[2])*bininvz);
iz = MIN(iz,nbinz-1);
} else
iz = static_cast<int> ((z-bboxlo[2])*bininvz) - 1;
return (iz-mbinzlo)*mbiny*mbinx + (iy-mbinylo)*mbinx + (ix-mbinxlo);
}
KOKKOS_INLINE_FUNCTION
int coord2bin(const X_FLOAT & x,const X_FLOAT & y,const X_FLOAT & z, int* i) const
{
int ix,iy,iz;
if (x >= bboxhi[0])
ix = static_cast<int> ((x-bboxhi[0])*bininvx) + nbinx;
else if (x >= bboxlo[0]) {
ix = static_cast<int> ((x-bboxlo[0])*bininvx);
ix = MIN(ix,nbinx-1);
} else
ix = static_cast<int> ((x-bboxlo[0])*bininvx) - 1;
if (y >= bboxhi[1])
iy = static_cast<int> ((y-bboxhi[1])*bininvy) + nbiny;
else if (y >= bboxlo[1]) {
iy = static_cast<int> ((y-bboxlo[1])*bininvy);
iy = MIN(iy,nbiny-1);
} else
iy = static_cast<int> ((y-bboxlo[1])*bininvy) - 1;
if (z >= bboxhi[2])
iz = static_cast<int> ((z-bboxhi[2])*bininvz) + nbinz;
else if (z >= bboxlo[2]) {
iz = static_cast<int> ((z-bboxlo[2])*bininvz);
iz = MIN(iz,nbinz-1);
} else
iz = static_cast<int> ((z-bboxlo[2])*bininvz) - 1;
i[0] = ix - mbinxlo;
i[1] = iy - mbinylo;
i[2] = iz - mbinzlo;
return (iz-mbinzlo)*mbiny*mbinx + (iy-mbinylo)*mbinx + (ix-mbinxlo);
}
KOKKOS_INLINE_FUNCTION
int exclusion(const int &i,const int &j, const int &itype,const int &jtype) const;
KOKKOS_INLINE_FUNCTION
int find_special(const int &i, const int &j) const;
KOKKOS_INLINE_FUNCTION
int minimum_image_check(double dx, double dy, double dz) const {
if (xperiodic && fabs(dx) > xprd_half) return 1;
if (yperiodic && fabs(dy) > yprd_half) return 1;
if (zperiodic && fabs(dz) > zprd_half) return 1;
return 0;
}
};
template<class Device>
struct NeighborKokkosBinAtomsFunctor {
typedef Device device_type;
const NeighborKokkosExecute<Device> c;
NeighborKokkosBinAtomsFunctor(const NeighborKokkosExecute<Device> &_c):
c(_c) {};
~NeighborKokkosBinAtomsFunctor() {}
KOKKOS_INLINE_FUNCTION
void operator() (const int & i) const {
c.binatomsItem(i);
}
};
template<class Device,int HALF_NEIGH,int GHOST_NEWTON>
struct NeighborKokkosBuildFunctor {
typedef Device device_type;
const NeighborKokkosExecute<Device> c;
const size_t sharedsize;
NeighborKokkosBuildFunctor(const NeighborKokkosExecute<Device> &_c,
const size_t _sharedsize):c(_c),
sharedsize(_sharedsize) {};
KOKKOS_INLINE_FUNCTION
void operator() (const int & i) const {
c.template build_Item<HALF_NEIGH,GHOST_NEWTON>(i);
}
#ifdef KOKKOS_HAVE_CUDA
KOKKOS_INLINE_FUNCTION
void operator() (typename Kokkos::TeamPolicy<Device>::member_type dev) const {
c.template build_ItemCuda<HALF_NEIGH,GHOST_NEWTON>(dev);
}
size_t shmem_size(const int team_size) const { (void) team_size; return sharedsize; }
#endif
};
template<class Device,int HALF_NEIGH>
struct NeighborKokkosBuildFunctorGhost {
typedef Device device_type;
const NeighborKokkosExecute<Device> c;
const size_t sharedsize;
NeighborKokkosBuildFunctorGhost(const NeighborKokkosExecute<Device> &_c,
const size_t _sharedsize):c(_c),
sharedsize(_sharedsize) {};
KOKKOS_INLINE_FUNCTION
void operator() (const int & i) const {
c.template build_Item_Ghost<HALF_NEIGH>(i);
}
};
template<class Device,int ClusterSize>
struct NeighborClusterKokkosBuildFunctor {
typedef Device device_type;
const NeighborKokkosExecute<Device> c;
const size_t sharedsize;
NeighborClusterKokkosBuildFunctor(const NeighborKokkosExecute<Device> &_c,
const size_t _sharedsize):c(_c),
sharedsize(_sharedsize) {};
KOKKOS_INLINE_FUNCTION
void operator() (const int & i) const {
c.template build_cluster_Item<ClusterSize>(i);
}
};
template<class DeviceType>
struct TagNeighborCheckDistance{};
@ -342,24 +32,11 @@ class NeighborKokkos : public Neighbor {
public:
typedef int value_type;
int nlist_host; // pairwise neighbor lists on Host
NeighListKokkos<LMPHostType> **lists_host;
int nlist_device; // pairwise neighbor lists on Device
NeighListKokkos<LMPDeviceType> **lists_device;
NeighBondKokkos<LMPHostType> neighbond_host;
NeighBondKokkos<LMPDeviceType> neighbond_device;
DAT::tdual_int_2d k_bondlist;
DAT::tdual_int_2d k_anglelist;
DAT::tdual_int_2d k_dihedrallist;
DAT::tdual_int_2d k_improperlist;
NeighborKokkos(class LAMMPS *);
~NeighborKokkos();
void init();
void init_topology();
void build_topology();
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
@ -369,11 +46,7 @@ class NeighborKokkos : public Neighbor {
KOKKOS_INLINE_FUNCTION
void operator()(TagNeighborXhold<DeviceType>, const int&) const;
private:
int atoms_per_bin;
DAT::tdual_xfloat_2d k_cutneighsq;
DAT::tdual_int_1d k_bincount;
DAT::tdual_int_2d k_bins;
DAT::tdual_int_1d k_ex1_type,k_ex2_type;
DAT::tdual_int_2d k_ex_type;
@ -382,6 +55,16 @@ class NeighborKokkos : public Neighbor {
DAT::tdual_int_1d k_ex_mol_group;
DAT::tdual_int_1d k_ex_mol_bit;
NeighBondKokkos<LMPHostType> neighbond_host;
NeighBondKokkos<LMPDeviceType> neighbond_device;
DAT::tdual_int_2d k_bondlist;
DAT::tdual_int_2d k_anglelist;
DAT::tdual_int_2d k_dihedrallist;
DAT::tdual_int_2d k_improperlist;
private:
DAT::tdual_x_array x;
DAT::tdual_x_array xhold;
@ -389,14 +72,10 @@ class NeighborKokkos : public Neighbor {
int device_flag;
void init_cutneighsq_kokkos(int);
int init_lists_kokkos();
void init_list_flags1_kokkos(int);
void init_list_flags2_kokkos(int);
void init_list_grow_kokkos(int);
void create_kokkos_list(int);
void init_ex_type_kokkos(int);
void init_ex_bit_kokkos();
void init_ex_mol_bit_kokkos();
void choose_build(int, NeighRequest *);
virtual int check_distance();
template<class DeviceType> int check_distance_kokkos();
virtual void build(int);
@ -405,27 +84,6 @@ class NeighborKokkos : public Neighbor {
void modify_ex_type_grow_kokkos();
void modify_ex_group_grow_kokkos();
void modify_mol_group_grow_kokkos();
void init_topology_kokkos();
void build_topology_kokkos();
typedef void (NeighborKokkos::*PairPtrHost)
(class NeighListKokkos<LMPHostType> *);
PairPtrHost *pair_build_host;
typedef void (NeighborKokkos::*PairPtrDevice)
(class NeighListKokkos<LMPDeviceType> *);
PairPtrDevice *pair_build_device;
template<class DeviceType,int HALF_NEIGH, int GHOST>
void full_bin_kokkos(NeighListKokkos<DeviceType> *list);
template<class DeviceType>
void full_bin_cluster_kokkos(NeighListKokkos<DeviceType> *list);
typedef void (NeighborKokkos::*StencilPtrHost)
(class NeighListKokkos<LMPHostType> *, int, int, int);
StencilPtrHost *stencil_create_host;
typedef void (NeighborKokkos::*StencilPtrDevice)
(class NeighListKokkos<LMPDeviceType> *, int, int, int);
StencilPtrDevice *stencil_create_device;
};
}

View File

@ -0,0 +1,62 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#include "npair_copy_kokkos.h"
#include "neighbor.h"
#include "neigh_list_kokkos.h"
#include "atom.h"
#include "atom_vec.h"
#include "molecule.h"
#include "domain.h"
#include "my_page.h"
#include "error.h"
using namespace LAMMPS_NS;
/* ---------------------------------------------------------------------- */
template<class DeviceType>
NPairCopyKokkos<DeviceType>::NPairCopyKokkos(LAMMPS *lmp) : NPair(lmp) {}
/* ----------------------------------------------------------------------
create list which is simply a copy of parent list
------------------------------------------------------------------------- */
template<class DeviceType>
void NPairCopyKokkos<DeviceType>::build(NeighList *list)
{
NeighList *listcopy = list->listcopy;
list->inum = listcopy->inum;
list->gnum = listcopy->gnum;
list->ilist = listcopy->ilist;
list->numneigh = listcopy->numneigh;
list->firstneigh = listcopy->firstneigh;
list->firstdouble = listcopy->firstdouble;
list->ipage = listcopy->ipage;
list->dpage = listcopy->dpage;
NeighListKokkos<DeviceType>* list_kk = (NeighListKokkos<DeviceType>*) list;
NeighListKokkos<DeviceType>* listcopy_kk = (NeighListKokkos<DeviceType>*) list->listcopy;
list_kk->d_ilist = listcopy_kk->d_ilist;
list_kk->d_numneigh = listcopy_kk->d_numneigh;
list_kk->d_neighbors = listcopy_kk->d_neighbors;
}
namespace LAMMPS_NS {
template class NPairCopyKokkos<LMPDeviceType>;
#ifdef KOKKOS_HAVE_CUDA
template class NPairCopyKokkos<LMPHostType>;
#endif
}

View File

@ -0,0 +1,48 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef NPAIR_CLASS
NPairStyle(copy/kk/device,
NPairCopyKokkos<LMPDeviceType>,
NP_COPY | NP_KOKKOS_DEVICE)
NPairStyle(copy/kk/host,
NPairCopyKokkos<LMPHostType>,
NP_COPY | NP_KOKKOS_HOST)
#else
#ifndef LMP_NPAIR_COPY_KOKKOS_H
#define LMP_NPAIR_COPY_KOKKOS_H
#include "npair.h"
namespace LAMMPS_NS {
template<class DeviceType>
class NPairCopyKokkos : public NPair {
public:
NPairCopyKokkos(class LAMMPS *);
~NPairCopyKokkos() {}
void build(class NeighList *);
};
}
#endif
#endif
/* ERROR/WARNING messages:
*/

View File

@ -11,17 +11,105 @@
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#include "npair_kokkos.h"
#include "atom_kokkos.h"
#include "atom_masks.h"
#include "domain_kokkos.h"
#include "neighbor_kokkos.h"
#include "nbin_kokkos.h"
#include "nstencil.h"
#include "force.h"
namespace LAMMPS_NS {
/* ---------------------------------------------------------------------- */
template<class DeviceType, int HALF_NEIGH, int GHOST>
void NeighborKokkos::full_bin_kokkos(NeighListKokkos<DeviceType> *list)
NPairKokkos<DeviceType,HALF_NEIGH,GHOST>::NPairKokkos(LAMMPS *lmp) : NPair(lmp) {
}
/* ----------------------------------------------------------------------
copy needed info from Neighbor class to this build class
------------------------------------------------------------------------- */
template<class DeviceType, int HALF_NEIGH, int GHOST>
void NPairKokkos<DeviceType,HALF_NEIGH,GHOST>::copy_neighbor_info()
{
NPair::copy_neighbor_info();
NeighborKokkos* neighborKK = (NeighborKokkos*) neighbor;
// general params
newton_pair = force->newton_pair;
k_cutneighsq = neighborKK->k_cutneighsq;
// exclusion info
k_ex1_type = neighborKK->k_ex1_type;
k_ex2_type = neighborKK->k_ex2_type;
k_ex_type = neighborKK->k_ex_type;
k_ex1_group = neighborKK->k_ex1_group;
k_ex2_group = neighborKK->k_ex2_group;
k_ex1_bit = neighborKK->k_ex1_bit;
k_ex2_bit = neighborKK->k_ex2_bit;
k_ex_mol_group = neighborKK->k_ex_mol_group;
k_ex_mol_bit = neighborKK->k_ex_mol_bit;
}
/* ----------------------------------------------------------------------
copy per-atom and per-bin vectors from NBin class to this build class
------------------------------------------------------------------------- */
template<class DeviceType, int HALF_NEIGH, int GHOST>
void NPairKokkos<DeviceType,HALF_NEIGH,GHOST>::copy_bin_info()
{
NPair::copy_bin_info();
NBinKokkos<DeviceType>* nbKK = (NBinKokkos<DeviceType>*) nb;
atoms_per_bin = nbKK->atoms_per_bin;
k_bincount = nbKK->k_bincount;
k_bins = nbKK->k_bins;
}
/* ----------------------------------------------------------------------
copy needed info from NStencil class to this build class
------------------------------------------------------------------------- */
template<class DeviceType, int HALF_NEIGH, int GHOST>
void NPairKokkos<DeviceType,HALF_NEIGH,GHOST>::copy_stencil_info()
{
NPair::copy_stencil_info();
nstencil = ns->nstencil;
int maxstencil = ns->get_maxstencil();
k_stencil = DAT::tdual_int_1d("neighlist:stencil",maxstencil);
for (int k = 0; k < maxstencil; k++)
k_stencil.h_view(k) = ns->stencil[k];
k_stencil.modify<LMPHostType>();
k_stencil.sync<DeviceType>();
if (GHOST) {
k_stencilxyz = DAT::tdual_int_1d_3("neighlist:stencilxyz",maxstencil);
for (int k = 0; k < maxstencil; k++) {
k_stencilxyz.h_view(k,0) = ns->stencilxyz[k][0];
k_stencilxyz.h_view(k,1) = ns->stencilxyz[k][1];
k_stencilxyz.h_view(k,2) = ns->stencilxyz[k][2];
}
k_stencilxyz.modify<LMPHostType>();
k_stencilxyz.sync<DeviceType>();
}
}
/* ---------------------------------------------------------------------- */
template<class DeviceType, int HALF_NEIGH, int GHOST>
void NPairKokkos<DeviceType,HALF_NEIGH,GHOST>::build(NeighList *list_)
{
NeighListKokkos<DeviceType>* list = (NeighListKokkos<DeviceType>*) list_;
const int nlocal = includegroup?atom->nfirst:atom->nlocal;
int nall = nlocal;
if (GHOST)
@ -32,7 +120,11 @@ void NeighborKokkos::full_bin_kokkos(NeighListKokkos<DeviceType> *list)
data(*list,
k_cutneighsq.view<DeviceType>(),
k_bincount.view<DeviceType>(),
k_bins.view<DeviceType>(),nlocal,
k_bins.view<DeviceType>(),
nstencil,
k_stencil.view<DeviceType>(),
k_stencilxyz.view<DeviceType>(),
nlocal,
atomKK->k_x.view<DeviceType>(),
atomKK->k_type.view<DeviceType>(),
atomKK->k_mask.view<DeviceType>(),
@ -43,16 +135,16 @@ void NeighborKokkos::full_bin_kokkos(NeighListKokkos<DeviceType> *list)
atomKK->molecular,
nbinx,nbiny,nbinz,mbinx,mbiny,mbinz,mbinxlo,mbinylo,mbinzlo,
bininvx,bininvy,bininvz,
exclude, nex_type,maxex_type,
exclude, nex_type,
k_ex1_type.view<DeviceType>(),
k_ex2_type.view<DeviceType>(),
k_ex_type.view<DeviceType>(),
nex_group,maxex_group,
nex_group,
k_ex1_group.view<DeviceType>(),
k_ex2_group.view<DeviceType>(),
k_ex1_bit.view<DeviceType>(),
k_ex2_bit.view<DeviceType>(),
nex_mol, maxex_mol,
nex_mol,
k_ex_mol_group.view<DeviceType>(),
k_ex_mol_bit.view<DeviceType>(),
bboxhi,bboxlo,
@ -69,40 +161,15 @@ void NeighborKokkos::full_bin_kokkos(NeighListKokkos<DeviceType> *list)
k_ex2_bit.sync<DeviceType>();
k_ex_mol_group.sync<DeviceType>();
k_ex_mol_bit.sync<DeviceType>();
k_bincount.sync<DeviceType>(),
k_bins.sync<DeviceType>(),
atomKK->sync(Device,X_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK|TAG_MASK|SPECIAL_MASK);
Kokkos::deep_copy(list->d_stencil,list->h_stencil);
if (GHOST)
Kokkos::deep_copy(list->d_stencilxyz,list->h_stencilxyz);
data.special_flag[0] = special_flag[0];
data.special_flag[1] = special_flag[1];
data.special_flag[2] = special_flag[2];
data.special_flag[3] = special_flag[3];
while(data.h_resize() > 0) {
data.h_resize() = 0;
deep_copy(data.resize, data.h_resize);
MemsetZeroFunctor<DeviceType> f_zero;
f_zero.ptr = (void*) k_bincount.view<DeviceType>().ptr_on_device();
Kokkos::parallel_for(mbins, f_zero);
DeviceType::fence();
NeighborKokkosBinAtomsFunctor<DeviceType> f(data);
Kokkos::parallel_for(atom->nlocal+atom->nghost, f);
DeviceType::fence();
deep_copy(data.h_resize, data.resize);
if(data.h_resize()) {
atoms_per_bin += 16;
k_bins = DAT::tdual_int_2d("bins", mbins, atoms_per_bin);
data.bins = k_bins.view<DeviceType>();
data.c_bins = data.bins;
}
}
if(list->d_neighbors.dimension_0()<nall) {
list->d_neighbors = typename ArrayTypes<DeviceType>::t_neighbors_2d("neighbors", nall*1.1, list->maxneighs);
list->d_numneigh = typename ArrayTypes<DeviceType>::t_int_1d("numneigh", nall*1.1);
@ -125,18 +192,18 @@ void NeighborKokkos::full_bin_kokkos(NeighListKokkos<DeviceType> *list)
#endif
if (GHOST) {
NeighborKokkosBuildFunctorGhost<DeviceType,HALF_NEIGH> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor);
NPairKokkosBuildFunctorGhost<DeviceType,HALF_NEIGH> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor);
Kokkos::parallel_for(nall, f);
} else {
if(newton_pair) {
NeighborKokkosBuildFunctor<DeviceType,HALF_NEIGH,1> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor);
if (newton_pair) {
NPairKokkosBuildFunctor<DeviceType,HALF_NEIGH,1> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor);
#ifdef KOKKOS_HAVE_CUDA
Kokkos::parallel_for(config, f);
#else
Kokkos::parallel_for(nall, f);
#endif
} else {
NeighborKokkosBuildFunctor<DeviceType,HALF_NEIGH,0> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor);
NPairKokkosBuildFunctor<DeviceType,HALF_NEIGH,0> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor);
#ifdef KOKKOS_HAVE_CUDA
Kokkos::parallel_for(config, f);
#else
@ -169,24 +236,9 @@ if (GHOST) {
/* ---------------------------------------------------------------------- */
template<class Device>
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void NeighborKokkosExecute<Device>::binatomsItem(const int &i) const
{
const int ibin = coord2bin(x(i, 0), x(i, 1), x(i, 2));
const int ac = Kokkos::atomic_fetch_add(&bincount[ibin], (int)1);
if(ac < bins.dimension_1()) {
bins(ibin, ac) = i;
} else {
resize() = 1;
}
}
/* ---------------------------------------------------------------------- */
template<class Device>
KOKKOS_INLINE_FUNCTION
int NeighborKokkosExecute<Device>::find_special(const int &i, const int &j) const
int NeighborKokkosExecute<DeviceType>::find_special(const int &i, const int &j) const
{
const int n1 = nspecial(i,0);
const int n2 = nspecial(i,1);
@ -214,9 +266,9 @@ int NeighborKokkosExecute<Device>::find_special(const int &i, const int &j) cons
/* ---------------------------------------------------------------------- */
template<class Device>
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
int NeighborKokkosExecute<Device>::exclusion(const int &i,const int &j,
int NeighborKokkosExecute<DeviceType>::exclusion(const int &i,const int &j,
const int &itype,const int &jtype) const
{
int m;
@ -241,8 +293,8 @@ int NeighborKokkosExecute<Device>::exclusion(const int &i,const int &j,
/* ---------------------------------------------------------------------- */
template<class Device> template<int HalfNeigh,int GhostNewton>
void NeighborKokkosExecute<Device>::
template<class DeviceType> template<int HalfNeigh,int Newton>
void NeighborKokkosExecute<DeviceType>::
build_Item(const int &i) const
{
/* if necessary, goto next page and add pages */
@ -261,9 +313,8 @@ void NeighborKokkosExecute<Device>::
const int ibin = coord2bin(xtmp, ytmp, ztmp);
const int nstencil = neigh_list.nstencil;
const typename ArrayTypes<Device>::t_int_1d_const_um stencil
= neigh_list.d_stencil;
const typename ArrayTypes<DeviceType>::t_int_1d_const_um stencil
= d_stencil;
// loop over all bins in neighborhood (includes ibin)
if(HalfNeigh)
@ -272,8 +323,8 @@ void NeighborKokkosExecute<Device>::
const int jtype = type(j);
//for same bin as atom i skip j if i==j and skip atoms "below and to the left" if using HalfNeighborlists
if((j == i) || (HalfNeigh && !GhostNewton && (j < i)) ||
(HalfNeigh && GhostNewton && ((j < i) || ((j >= nlocal) &&
if((j == i) || (HalfNeigh && !Newton && (j < i)) ||
(HalfNeigh && Newton && ((j < i) || ((j >= nlocal) &&
((x(j, 2) < ztmp) || (x(j, 2) == ztmp && x(j, 1) < ytmp) ||
(x(j, 2) == ztmp && x(j, 1) == ytmp && x(j, 0) < xtmp)))))
) continue;
@ -312,14 +363,16 @@ void NeighborKokkosExecute<Device>::
for(int k = 0; k < nstencil; k++) {
const int jbin = ibin + stencil[k];
// get subview of jbin
if(HalfNeigh&&(ibin==jbin)) continue;
//const ArrayTypes<Device>::t_int_1d_const_um =Kokkos::subview<t_int_1d_const_um>(bins,jbin,ALL);
//const ArrayTypes<DeviceType>::t_int_1d_const_um =Kokkos::subview<t_int_1d_const_um>(bins,jbin,ALL);
for(int m = 0; m < c_bincount(jbin); m++) {
const int j = c_bins(jbin,m);
const int jtype = type(j);
if(HalfNeigh && !GhostNewton && (j < i)) continue;
if(HalfNeigh && !Newton && (j < i)) continue;
if(!HalfNeigh && j==i) continue;
if(exclude && exclusion(i,j,itype,jtype)) continue;
@ -331,7 +384,7 @@ void NeighborKokkosExecute<Device>::
if(rsq <= cutneighsq(itype,jtype)) {
if (molecular) {
if (!moltemplate)
which = find_special(i,j);
which = NeighborKokkosExecute<DeviceType>::find_special(i,j);
/* else if (imol >= 0) */
/* which = find_special(onemols[imol]->special[iatom], */
/* onemols[imol]->nspecial[iatom], */
@ -364,15 +417,18 @@ void NeighborKokkosExecute<Device>::
if(n >= new_maxneighs()) new_maxneighs() = n;
}
neigh_list.d_ilist(i) = i;
}
/* ---------------------------------------------------------------------- */
#ifdef KOKKOS_HAVE_CUDA
extern __shared__ X_FLOAT sharedmem[];
/* ---------------------------------------------------------------------- */
template<class DeviceType> template<int HalfNeigh,int GhostNewton>
template<class DeviceType> template<int HalfNeigh,int Newton>
__device__ inline
void NeighborKokkosExecute<DeviceType>::build_ItemCuda(typename Kokkos::TeamPolicy<DeviceType>::member_type dev) const
{
@ -429,8 +485,8 @@ void NeighborKokkosExecute<DeviceType>::build_ItemCuda(typename Kokkos::TeamPoli
//for same bin as atom i skip j if i==j and skip atoms "below and to the left" if using halfneighborlists
if((j == i) ||
(HalfNeigh && !GhostNewton && (j < i)) ||
(HalfNeigh && GhostNewton &&
(HalfNeigh && !Newton && (j < i)) ||
(HalfNeigh && Newton &&
((j < i) ||
((j >= nlocal) && ((x(j, 2) < ztmp) || (x(j, 2) == ztmp && x(j, 1) < ytmp) ||
(x(j, 2) == ztmp && x(j, 1) == ytmp && x(j, 0) < xtmp)))))
@ -445,7 +501,7 @@ void NeighborKokkosExecute<DeviceType>::build_ItemCuda(typename Kokkos::TeamPoli
if (molecular) {
int which = 0;
if (!moltemplate)
which = find_special(i,j);
which = NeighborKokkosExecute<DeviceType>::find_special(i,j);
/* else if (imol >= 0) */
/* which = find_special(onemols[imol]->special[iatom], */
/* onemols[imol]->nspecial[iatom], */
@ -472,9 +528,8 @@ void NeighborKokkosExecute<DeviceType>::build_ItemCuda(typename Kokkos::TeamPoli
}
__syncthreads();
const int nstencil = neigh_list.nstencil;
const typename ArrayTypes<DeviceType>::t_int_1d_const_um stencil
= neigh_list.d_stencil;
= d_stencil;
for(int k = 0; k < nstencil; k++) {
const int jbin = ibin + stencil[k];
@ -501,7 +556,7 @@ void NeighborKokkosExecute<DeviceType>::build_ItemCuda(typename Kokkos::TeamPoli
const int jtype = other_x[m + 3 * atoms_per_bin];
//if(HalfNeigh && (j < i)) continue;
if(HalfNeigh && !GhostNewton && (j < i)) continue;
if(HalfNeigh && !Newton && (j < i)) continue;
if(!HalfNeigh && j==i) continue;
if(exclude && exclusion(i,j,itype,jtype)) continue;
@ -514,7 +569,7 @@ void NeighborKokkosExecute<DeviceType>::build_ItemCuda(typename Kokkos::TeamPoli
if (molecular) {
int which = 0;
if (!moltemplate)
which = find_special(i,j);
which = NeighborKokkosExecute<DeviceType>::find_special(i,j);
/* else if (imol >= 0) */
/* which = find_special(onemols[imol]->special[iatom], */
/* onemols[imol]->nspecial[iatom], */
@ -558,8 +613,8 @@ void NeighborKokkosExecute<DeviceType>::build_ItemCuda(typename Kokkos::TeamPoli
/* ---------------------------------------------------------------------- */
template<class Device> template<int HalfNeigh>
void NeighborKokkosExecute<Device>::
template<class DeviceType> template<int HalfNeigh>
void NeighborKokkosExecute<DeviceType>::
build_Item_Ghost(const int &i) const
{
/* if necessary, goto next page and add pages */
@ -576,11 +631,10 @@ void NeighborKokkosExecute<Device>::
const X_FLOAT ztmp = x(i, 2);
const int itype = type(i);
const int nstencil = neigh_list.nstencil;
const typename ArrayTypes<Device>::t_int_1d_const_um stencil
= neigh_list.d_stencil;
const typename ArrayTypes<Device>::t_int_1d_3_const_um stencilxyz
= neigh_list.d_stencilxyz;
const typename ArrayTypes<DeviceType>::t_int_1d_const_um stencil
= d_stencil;
const typename ArrayTypes<DeviceType>::t_int_1d_3_const_um stencilxyz
= d_stencilxyz;
// loop over all atoms in surrounding bins in stencil including self
// when i is a ghost atom, must check if stencil bin is out of bounds
@ -679,197 +733,17 @@ void NeighborKokkosExecute<Device>::
neigh_list.d_ilist(i) = i;
}
template<class DeviceType>
void NeighborKokkos::full_bin_cluster_kokkos(NeighListKokkos<DeviceType> *list)
{
const int nall = includegroup?atom->nfirst:atom->nlocal;
list->grow(nall);
}
NeighborKokkosExecute<DeviceType>
data(*list,
k_cutneighsq.view<DeviceType>(),
k_bincount.view<DeviceType>(),
k_bins.view<DeviceType>(),nall,
atomKK->k_x.view<DeviceType>(),
atomKK->k_type.view<DeviceType>(),
atomKK->k_mask.view<DeviceType>(),
atomKK->k_molecule.view<DeviceType>(),
atomKK->k_tag.view<DeviceType>(),
atomKK->k_special.view<DeviceType>(),
atomKK->k_nspecial.view<DeviceType>(),
atomKK->molecular,
nbinx,nbiny,nbinz,mbinx,mbiny,mbinz,mbinxlo,mbinylo,mbinzlo,
bininvx,bininvy,bininvz,
exclude, nex_type,maxex_type,
k_ex1_type.view<DeviceType>(),
k_ex2_type.view<DeviceType>(),
k_ex_type.view<DeviceType>(),
nex_group,maxex_group,
k_ex1_group.view<DeviceType>(),
k_ex2_group.view<DeviceType>(),
k_ex1_bit.view<DeviceType>(),
k_ex2_bit.view<DeviceType>(),
nex_mol, maxex_mol,
k_ex_mol_group.view<DeviceType>(),
k_ex_mol_bit.view<DeviceType>(),
bboxhi,bboxlo,
domain->xperiodic,domain->yperiodic,domain->zperiodic,
domain->xprd_half,domain->yprd_half,domain->zprd_half);
k_cutneighsq.sync<DeviceType>();
k_ex1_type.sync<DeviceType>();
k_ex2_type.sync<DeviceType>();
k_ex_type.sync<DeviceType>();
k_ex1_group.sync<DeviceType>();
k_ex2_group.sync<DeviceType>();
k_ex1_bit.sync<DeviceType>();
k_ex2_bit.sync<DeviceType>();
k_ex_mol_group.sync<DeviceType>();
k_ex_mol_bit.sync<DeviceType>();
data.special_flag[0] = special_flag[0];
data.special_flag[1] = special_flag[1];
data.special_flag[2] = special_flag[2];
data.special_flag[3] = special_flag[3];
atomKK->sync(Device,X_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK|TAG_MASK|SPECIAL_MASK);
Kokkos::deep_copy(list->d_stencil,list->h_stencil);
DeviceType::fence();
while(data.h_resize() > 0) {
data.h_resize() = 0;
deep_copy(data.resize, data.h_resize);
MemsetZeroFunctor<DeviceType> f_zero;
f_zero.ptr = (void*) k_bincount.view<DeviceType>().ptr_on_device();
Kokkos::parallel_for(mbins, f_zero);
DeviceType::fence();
NeighborKokkosBinAtomsFunctor<DeviceType> f(data);
Kokkos::parallel_for(atom->nlocal+atom->nghost, f);
DeviceType::fence();
deep_copy(data.h_resize, data.resize);
if(data.h_resize()) {
atoms_per_bin += 16;
k_bins = DAT::tdual_int_2d("bins", mbins, atoms_per_bin);
data.bins = k_bins.view<DeviceType>();
data.c_bins = data.bins;
}
}
if(list->d_neighbors.dimension_0()<nall) {
list->d_neighbors = typename ArrayTypes<DeviceType>::t_neighbors_2d("neighbors", nall*1.1, list->maxneighs);
list->d_numneigh = typename ArrayTypes<DeviceType>::t_int_1d("numneigh", nall*1.1);
data.neigh_list.d_neighbors = list->d_neighbors;
data.neigh_list.d_numneigh = list->d_numneigh;
}
data.h_resize()=1;
while(data.h_resize()) {
data.h_new_maxneighs() = list->maxneighs;
data.h_resize() = 0;
Kokkos::deep_copy(data.resize, data.h_resize);
Kokkos::deep_copy(data.new_maxneighs, data.h_new_maxneighs);
namespace LAMMPS_NS {
template class NPairKokkos<LMPDeviceType,0,0>;
template class NPairKokkos<LMPDeviceType,0,1>;
template class NPairKokkos<LMPDeviceType,1,0>;
template class NPairKokkos<LMPDeviceType,1,1>;
#ifdef KOKKOS_HAVE_CUDA
#define BINS_PER_BLOCK 2
const int factor = atoms_per_bin<64?2:1;
Kokkos::TeamPolicy<DeviceType> config((mbins+factor-1)/factor,atoms_per_bin*factor);
#else
const int factor = 1;
template class NPairKokkos<LMPHostType,0,0>;
template class NPairKokkos<LMPHostType,0,1>;
template class NPairKokkos<LMPHostType,1,0>;
template class NPairKokkos<LMPHostType,1,1>;
#endif
if(newton_pair) {
NeighborClusterKokkosBuildFunctor<DeviceType,NeighClusterSize> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor);
//#ifdef KOKKOS_HAVE_CUDA
// Kokkos::parallel_for(config, f);
//#else
Kokkos::parallel_for(nall, f);
//#endif
} else {
NeighborClusterKokkosBuildFunctor<DeviceType,NeighClusterSize> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor);
//#ifdef KOKKOS_HAVE_CUDA
// Kokkos::parallel_for(config, f);
//#else
Kokkos::parallel_for(nall, f);
//#endif
}
DeviceType::fence();
deep_copy(data.h_resize, data.resize);
if(data.h_resize()) {
deep_copy(data.h_new_maxneighs, data.new_maxneighs);
list->maxneighs = data.h_new_maxneighs() * 1.2;
list->d_neighbors = typename ArrayTypes<DeviceType>::t_neighbors_2d("neighbors", list->d_neighbors.dimension_0(), list->maxneighs);
data.neigh_list.d_neighbors = list->d_neighbors;
data.neigh_list.maxneighs = list->maxneighs;
}
}
list->inum = nall;
list->gnum = 0;
}
/* ---------------------------------------------------------------------- */
template<class Device> template<int ClusterSize>
void NeighborKokkosExecute<Device>::
build_cluster_Item(const int &i) const
{
/* if necessary, goto next page and add pages */
int n = 0;
// get subview of neighbors of i
const AtomNeighbors neighbors_i = neigh_list.get_neighbors(i);
const X_FLOAT xtmp = x(i, 0);
const X_FLOAT ytmp = x(i, 1);
const X_FLOAT ztmp = x(i, 2);
const int itype = type(i);
const int ibin = coord2bin(xtmp, ytmp, ztmp);
const int nstencil = neigh_list.nstencil;
const typename ArrayTypes<Device>::t_int_1d_const_um stencil
= neigh_list.d_stencil;
for(int k = 0; k < nstencil; k++) {
const int jbin = ibin + stencil[k];
for(int m = 0; m < c_bincount(jbin); m++) {
const int j = c_bins(jbin,m);
bool skip = i == j;
for(int k = 0; k< (n<neigh_list.maxneighs?n:neigh_list.maxneighs); k++)
if((j-(j%ClusterSize)) == neighbors_i(k)) {skip=true;};//{m += ClusterSize - j&(ClusterSize-1)-1; skip=true;}
if(!skip) {
const int jtype = type(j);
const X_FLOAT delx = xtmp - x(j, 0);
const X_FLOAT dely = ytmp - x(j, 1);
const X_FLOAT delz = ztmp - x(j, 2);
const X_FLOAT rsq = delx * delx + dely * dely + delz * delz;
if(rsq <= cutneighsq(itype,jtype)) {
if(n<neigh_list.maxneighs) neighbors_i(n) = (j-(j%ClusterSize));
n++;
//m += ClusterSize - j&(ClusterSize-1)-1;
}
}
}
}
neigh_list.d_numneigh(i) = n;
if(n >= neigh_list.maxneighs) {
resize() = 1;
if(n >= new_maxneighs()) new_maxneighs() = n;
}
neigh_list.d_ilist(i) = i;
}
}

424
src/KOKKOS/npair_kokkos.h Normal file
View File

@ -0,0 +1,424 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef NPAIR_CLASS
typedef NPairKokkos<LMPHostType,0,0> NPairKokkosFullBinHost;
NPairStyle(full/bin/kk/host,
NPairKokkosFullBinHost,
NP_FULL | NP_BIN | NP_KOKKOS_HOST | NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI)
typedef NPairKokkos<LMPDeviceType,0,0> NPairKokkosFullBinDevice;
NPairStyle(full/bin/kk/device,
NPairKokkosFullBinDevice,
NP_FULL | NP_BIN | NP_KOKKOS_DEVICE | NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI)
typedef NPairKokkos<LMPHostType,0,1> NPairKokkosFullBinGhostHost;
NPairStyle(full/bin/ghost/kk/host,
NPairKokkosFullBinGhostHost,
NP_FULL | NP_BIN | NP_KOKKOS_HOST | NP_NEWTON | NP_NEWTOFF | NP_GHOST | NP_ORTHO | NP_TRI)
typedef NPairKokkos<LMPDeviceType,0,1> NPairKokkosFullBinGhostDevice;
NPairStyle(full/bin/ghost/kk/device,
NPairKokkosFullBinGhostDevice,
NP_FULL | NP_BIN | NP_KOKKOS_DEVICE | NP_NEWTON | NP_NEWTOFF | NP_GHOST | NP_ORTHO | NP_TRI)
typedef NPairKokkos<LMPHostType,1,0> NPairKokkosHalfBinHost;
NPairStyle(half/bin/kk/host,
NPairKokkosHalfBinHost,
NP_HALF | NP_BIN | NP_KOKKOS_HOST | NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI)
typedef NPairKokkos<LMPDeviceType,1,0> NPairKokkosHalfBinDevice;
NPairStyle(half/bin/kk/device,
NPairKokkosHalfBinDevice,
NP_HALF | NP_BIN | NP_KOKKOS_DEVICE | NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI)
typedef NPairKokkos<LMPHostType,1,1> NPairKokkosHalfBinGhostHost;
NPairStyle(half/bin/ghost/kk/host,
NPairKokkosHalfBinGhostHost,
NP_HALF | NP_BIN | NP_KOKKOS_HOST | NP_NEWTON | NP_NEWTOFF | NP_GHOST | NP_ORTHO | NP_TRI)
typedef NPairKokkos<LMPDeviceType,1,1> NPairKokkosHalfBinGhostDevice;
NPairStyle(half/bin/ghost/kk/device,
NPairKokkosHalfBinGhostDevice,
NP_HALF | NP_BIN | NP_KOKKOS_DEVICE | NP_NEWTON | NP_NEWTOFF | NP_GHOST | NP_ORTHO | NP_TRI)
#else
#ifndef LMP_NPAIR_KOKKOS_H
#define LMP_NPAIR_KOKKOS_H
#include "npair.h"
#include "neigh_list_kokkos.h"
namespace LAMMPS_NS {
template<class DeviceType, int HALF_NEIGH, int GHOST>
class NPairKokkos : public NPair {
public:
NPairKokkos(class LAMMPS *);
~NPairKokkos() {}
void copy_neighbor_info();
void copy_bin_info();
void copy_stencil_info();
void build(class NeighList *);
private:
int newton_pair;
// data from Neighbor class
DAT::tdual_xfloat_2d k_cutneighsq;
// exclusion data from Neighbor class
DAT::tdual_int_1d k_ex1_type,k_ex2_type;
DAT::tdual_int_2d k_ex_type;
DAT::tdual_int_1d k_ex1_group,k_ex2_group;
DAT::tdual_int_1d k_ex1_bit,k_ex2_bit;
DAT::tdual_int_1d k_ex_mol_group;
DAT::tdual_int_1d k_ex_mol_bit;
// data from NBin class
int atoms_per_bin;
DAT::tdual_int_1d k_bincount;
DAT::tdual_int_2d k_bins;
// data from NStencil class
int nstencil;
DAT::tdual_int_1d k_stencil; // # of J neighs for each I
DAT::tdual_int_1d_3 k_stencilxyz;
};
template<class DeviceType>
class NeighborKokkosExecute
{
typedef ArrayTypes<DeviceType> AT;
public:
NeighListKokkos<DeviceType> neigh_list;
// data from Neighbor class
const typename AT::t_xfloat_2d_randomread cutneighsq;
// exclusion data from Neighbor class
const int exclude;
const int nex_type;
const typename AT::t_int_1d_const ex1_type,ex2_type;
const typename AT::t_int_2d_const ex_type;
const int nex_group;
const typename AT::t_int_1d_const ex1_group,ex2_group;
const typename AT::t_int_1d_const ex1_bit,ex2_bit;
const int nex_mol;
const typename AT::t_int_1d_const ex_mol_group;
const typename AT::t_int_1d_const ex_mol_bit;
// data from NBin class
const typename AT::t_int_1d bincount;
const typename AT::t_int_1d_const c_bincount;
typename AT::t_int_2d bins;
typename AT::t_int_2d_const c_bins;
// data from NStencil class
int nstencil;
typename AT::t_int_1d d_stencil; // # of J neighs for each I
typename AT::t_int_1d_3 d_stencilxyz;
// data from Atom class
const typename AT::t_x_array_randomread x;
const typename AT::t_int_1d_const type,mask,molecule;
const typename AT::t_tagint_1d_const tag;
const typename AT::t_tagint_2d_const special;
const typename AT::t_int_2d_const nspecial;
const int molecular;
int moltemplate;
int special_flag[4];
const int nbinx,nbiny,nbinz;
const int mbinx,mbiny,mbinz;
const int mbinxlo,mbinylo,mbinzlo;
const X_FLOAT bininvx,bininvy,bininvz;
X_FLOAT bboxhi[3],bboxlo[3];
const int nlocal;
typename AT::t_int_scalar resize;
typename AT::t_int_scalar new_maxneighs;
typename ArrayTypes<LMPHostType>::t_int_scalar h_resize;
typename ArrayTypes<LMPHostType>::t_int_scalar h_new_maxneighs;
const int xperiodic, yperiodic, zperiodic;
const int xprd_half, yprd_half, zprd_half;
NeighborKokkosExecute(
const NeighListKokkos<DeviceType> &_neigh_list,
const typename AT::t_xfloat_2d_randomread &_cutneighsq,
const typename AT::t_int_1d &_bincount,
const typename AT::t_int_2d &_bins,
const int _nstencil,
const typename AT::t_int_1d &_d_stencil,
const typename AT::t_int_1d_3 &_d_stencilxyz,
const int _nlocal,
const typename AT::t_x_array_randomread &_x,
const typename AT::t_int_1d_const &_type,
const typename AT::t_int_1d_const &_mask,
const typename AT::t_int_1d_const &_molecule,
const typename AT::t_tagint_1d_const &_tag,
const typename AT::t_tagint_2d_const &_special,
const typename AT::t_int_2d_const &_nspecial,
const int &_molecular,
const int & _nbinx,const int & _nbiny,const int & _nbinz,
const int & _mbinx,const int & _mbiny,const int & _mbinz,
const int & _mbinxlo,const int & _mbinylo,const int & _mbinzlo,
const X_FLOAT &_bininvx,const X_FLOAT &_bininvy,const X_FLOAT &_bininvz,
const int & _exclude,const int & _nex_type,
const typename AT::t_int_1d_const & _ex1_type,
const typename AT::t_int_1d_const & _ex2_type,
const typename AT::t_int_2d_const & _ex_type,
const int & _nex_group,
const typename AT::t_int_1d_const & _ex1_group,
const typename AT::t_int_1d_const & _ex2_group,
const typename AT::t_int_1d_const & _ex1_bit,
const typename AT::t_int_1d_const & _ex2_bit,
const int & _nex_mol,
const typename AT::t_int_1d_const & _ex_mol_group,
const typename AT::t_int_1d_const & _ex_mol_bit,
const X_FLOAT *_bboxhi, const X_FLOAT* _bboxlo,
const int & _xperiodic, const int & _yperiodic, const int & _zperiodic,
const int & _xprd_half, const int & _yprd_half, const int & _zprd_half):
neigh_list(_neigh_list), cutneighsq(_cutneighsq),
bincount(_bincount),c_bincount(_bincount),bins(_bins),c_bins(_bins),
nstencil(_nstencil),d_stencil(_d_stencil),d_stencilxyz(_d_stencilxyz),
nlocal(_nlocal),
x(_x),type(_type),mask(_mask),molecule(_molecule),
tag(_tag),special(_special),nspecial(_nspecial),molecular(_molecular),
nbinx(_nbinx),nbiny(_nbiny),nbinz(_nbinz),
mbinx(_mbinx),mbiny(_mbiny),mbinz(_mbinz),
mbinxlo(_mbinxlo),mbinylo(_mbinylo),mbinzlo(_mbinzlo),
bininvx(_bininvx),bininvy(_bininvy),bininvz(_bininvz),
exclude(_exclude),nex_type(_nex_type),
ex1_type(_ex1_type),ex2_type(_ex2_type),ex_type(_ex_type),
nex_group(_nex_group),
ex1_group(_ex1_group),ex2_group(_ex2_group),
ex1_bit(_ex1_bit),ex2_bit(_ex2_bit),nex_mol(_nex_mol),
ex_mol_group(_ex_mol_group),ex_mol_bit(_ex_mol_bit),
xperiodic(_xperiodic),yperiodic(_yperiodic),zperiodic(_zperiodic),
xprd_half(_xprd_half),yprd_half(_yprd_half),zprd_half(_zprd_half) {
if (molecular == 2) moltemplate = 1;
else moltemplate = 0;
bboxlo[0] = _bboxlo[0]; bboxlo[1] = _bboxlo[1]; bboxlo[2] = _bboxlo[2];
bboxhi[0] = _bboxhi[0]; bboxhi[1] = _bboxhi[1]; bboxhi[2] = _bboxhi[2];
resize = typename AT::t_int_scalar("NeighborKokkosFunctor::resize");
#ifndef KOKKOS_USE_CUDA_UVM
h_resize = Kokkos::create_mirror_view(resize);
#else
h_resize = resize;
#endif
h_resize() = 1;
new_maxneighs = typename AT::
t_int_scalar("NeighborKokkosFunctor::new_maxneighs");
#ifndef KOKKOS_USE_CUDA_UVM
h_new_maxneighs = Kokkos::create_mirror_view(new_maxneighs);
#else
h_new_maxneighs = new_maxneighs;
#endif
h_new_maxneighs() = neigh_list.maxneighs;
};
~NeighborKokkosExecute() {neigh_list.clean_copy();};
template<int HalfNeigh, int Newton>
KOKKOS_FUNCTION
void build_Item(const int &i) const;
template<int HalfNeigh>
KOKKOS_FUNCTION
void build_Item_Ghost(const int &i) const;
#ifdef KOKKOS_HAVE_CUDA
template<int HalfNeigh, int Newton>
__device__ inline
void build_ItemCuda(typename Kokkos::TeamPolicy<DeviceType>::member_type dev) const;
#endif
KOKKOS_INLINE_FUNCTION
void binatomsItem(const int &i) const;
KOKKOS_INLINE_FUNCTION
int coord2bin(const X_FLOAT & x,const X_FLOAT & y,const X_FLOAT & z) const
{
int ix,iy,iz;
if (x >= bboxhi[0])
ix = static_cast<int> ((x-bboxhi[0])*bininvx) + nbinx;
else if (x >= bboxlo[0]) {
ix = static_cast<int> ((x-bboxlo[0])*bininvx);
ix = MIN(ix,nbinx-1);
} else
ix = static_cast<int> ((x-bboxlo[0])*bininvx) - 1;
if (y >= bboxhi[1])
iy = static_cast<int> ((y-bboxhi[1])*bininvy) + nbiny;
else if (y >= bboxlo[1]) {
iy = static_cast<int> ((y-bboxlo[1])*bininvy);
iy = MIN(iy,nbiny-1);
} else
iy = static_cast<int> ((y-bboxlo[1])*bininvy) - 1;
if (z >= bboxhi[2])
iz = static_cast<int> ((z-bboxhi[2])*bininvz) + nbinz;
else if (z >= bboxlo[2]) {
iz = static_cast<int> ((z-bboxlo[2])*bininvz);
iz = MIN(iz,nbinz-1);
} else
iz = static_cast<int> ((z-bboxlo[2])*bininvz) - 1;
return (iz-mbinzlo)*mbiny*mbinx + (iy-mbinylo)*mbinx + (ix-mbinxlo);
}
KOKKOS_INLINE_FUNCTION
int coord2bin(const X_FLOAT & x,const X_FLOAT & y,const X_FLOAT & z, int* i) const
{
int ix,iy,iz;
if (x >= bboxhi[0])
ix = static_cast<int> ((x-bboxhi[0])*bininvx) + nbinx;
else if (x >= bboxlo[0]) {
ix = static_cast<int> ((x-bboxlo[0])*bininvx);
ix = MIN(ix,nbinx-1);
} else
ix = static_cast<int> ((x-bboxlo[0])*bininvx) - 1;
if (y >= bboxhi[1])
iy = static_cast<int> ((y-bboxhi[1])*bininvy) + nbiny;
else if (y >= bboxlo[1]) {
iy = static_cast<int> ((y-bboxlo[1])*bininvy);
iy = MIN(iy,nbiny-1);
} else
iy = static_cast<int> ((y-bboxlo[1])*bininvy) - 1;
if (z >= bboxhi[2])
iz = static_cast<int> ((z-bboxhi[2])*bininvz) + nbinz;
else if (z >= bboxlo[2]) {
iz = static_cast<int> ((z-bboxlo[2])*bininvz);
iz = MIN(iz,nbinz-1);
} else
iz = static_cast<int> ((z-bboxlo[2])*bininvz) - 1;
i[0] = ix - mbinxlo;
i[1] = iy - mbinylo;
i[2] = iz - mbinzlo;
return (iz-mbinzlo)*mbiny*mbinx + (iy-mbinylo)*mbinx + (ix-mbinxlo);
}
KOKKOS_INLINE_FUNCTION
int exclusion(const int &i,const int &j, const int &itype,const int &jtype) const;
KOKKOS_INLINE_FUNCTION
int find_special(const int &i, const int &j) const;
KOKKOS_INLINE_FUNCTION
int minimum_image_check(double dx, double dy, double dz) const {
if (xperiodic && fabs(dx) > xprd_half) return 1;
if (yperiodic && fabs(dy) > yprd_half) return 1;
if (zperiodic && fabs(dz) > zprd_half) return 1;
return 0;
}
};
template<class DeviceType,int HALF_NEIGH,int GHOST_NEWTON>
struct NPairKokkosBuildFunctor {
typedef DeviceType device_type;
const NeighborKokkosExecute<DeviceType> c;
const size_t sharedsize;
NPairKokkosBuildFunctor(const NeighborKokkosExecute<DeviceType> &_c,
const size_t _sharedsize):c(_c),
sharedsize(_sharedsize) {};
KOKKOS_INLINE_FUNCTION
void operator() (const int & i) const {
c.template build_Item<HALF_NEIGH,GHOST_NEWTON>(i);
}
#ifdef KOKKOS_HAVE_CUDA
__device__ inline
void operator() (typename Kokkos::TeamPolicy<DeviceType>::member_type dev) const {
c.template build_ItemCuda<HALF_NEIGH,GHOST_NEWTON>(dev);
}
size_t shmem_size(const int team_size) const { (void) team_size; return sharedsize; }
#endif
};
template<int HALF_NEIGH,int GHOST_NEWTON>
struct NPairKokkosBuildFunctor<LMPHostType,HALF_NEIGH,GHOST_NEWTON> {
typedef LMPHostType device_type;
const NeighborKokkosExecute<LMPHostType> c;
const size_t sharedsize;
NPairKokkosBuildFunctor(const NeighborKokkosExecute<LMPHostType> &_c,
const size_t _sharedsize):c(_c),
sharedsize(_sharedsize) {};
KOKKOS_INLINE_FUNCTION
void operator() (const int & i) const {
c.template build_Item<HALF_NEIGH,GHOST_NEWTON>(i);
}
void operator() (typename Kokkos::TeamPolicy<LMPHostType>::member_type dev) const {}
};
template<class DeviceType,int HALF_NEIGH>
struct NPairKokkosBuildFunctorGhost {
typedef DeviceType device_type;
const NeighborKokkosExecute<DeviceType> c;
const size_t sharedsize;
NPairKokkosBuildFunctorGhost(const NeighborKokkosExecute<DeviceType> &_c,
const size_t _sharedsize):c(_c),
sharedsize(_sharedsize) {};
KOKKOS_INLINE_FUNCTION
void operator() (const int & i) const {
c.template build_Item_Ghost<HALF_NEIGH>(i);
}
};
}
#endif
#endif
/* ERROR/WARNING messages:
*/

View File

@ -90,7 +90,7 @@ void PairBuckCoulCutKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
eflag = eflag_in;
vflag = vflag_in;
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
if (neighflag == FULL) no_virial_fdotr_compute = 1;
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0;
@ -309,19 +309,12 @@ void PairBuckCoulCutKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == N2) {
neighbor->requests[irequest]->full_cluster = 0;
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 0;
} else if (neighflag == FULLCLUSTER) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 1;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with buck/coul/cut/kk");
}

View File

@ -109,7 +109,7 @@ void PairBuckCoulLongKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
eflag = eflag_in;
vflag = vflag_in;
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
if (neighflag == FULL) no_virial_fdotr_compute = 1;
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0;
@ -458,11 +458,9 @@ void PairBuckCoulLongKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with buck/coul/long/kk");
}

View File

@ -79,7 +79,7 @@ void PairBuckKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
eflag = eflag_in;
vflag = vflag_in;
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
if (neighflag == FULL) no_virial_fdotr_compute = 1;
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0;
@ -233,19 +233,12 @@ void PairBuckKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == N2) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == FULLCLUSTER) {
neighbor->requests[irequest]->full_cluster = 1;
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with buck/kk");
}

View File

@ -31,7 +31,7 @@ namespace LAMMPS_NS {
template<class DeviceType>
class PairBuckKokkos : public PairBuck {
public:
enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF|N2|FULLCLUSTER};
enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF|N2};
enum {COUL_FLAG=0};
typedef DeviceType device_type;
PairBuckKokkos(class LAMMPS *);
@ -96,17 +96,14 @@ class PairBuckKokkos : public PairBuck {
friend class PairComputeFunctor<PairBuckKokkos,HALF,true>;
friend class PairComputeFunctor<PairBuckKokkos,HALFTHREAD,true>;
friend class PairComputeFunctor<PairBuckKokkos,N2,true>;
friend class PairComputeFunctor<PairBuckKokkos,FULLCLUSTER,true >;
friend class PairComputeFunctor<PairBuckKokkos,FULL,false>;
friend class PairComputeFunctor<PairBuckKokkos,HALF,false>;
friend class PairComputeFunctor<PairBuckKokkos,HALFTHREAD,false>;
friend class PairComputeFunctor<PairBuckKokkos,N2,false>;
friend class PairComputeFunctor<PairBuckKokkos,FULLCLUSTER,false >;
friend EV_FLOAT pair_compute_neighlist<PairBuckKokkos,FULL,void>(PairBuckKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairBuckKokkos,HALF,void>(PairBuckKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairBuckKokkos,HALFTHREAD,void>(PairBuckKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairBuckKokkos,N2,void>(PairBuckKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_fullcluster<PairBuckKokkos,void>(PairBuckKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute<PairBuckKokkos,void>(PairBuckKokkos*,NeighListKokkos<DeviceType>*);
friend void pair_virial_fdotr_compute<PairBuckKokkos>(PairBuckKokkos*);
};

View File

@ -78,7 +78,7 @@ void PairCoulCutKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
vflag = vflag_in;
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
if (neighflag == FULL) no_virial_fdotr_compute = 1;
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0;
@ -215,11 +215,9 @@ void PairCoulCutKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with coul/cut/kk");
}

View File

@ -85,7 +85,7 @@ void PairCoulDebyeKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
eflag = eflag_in;
vflag = vflag_in;
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
if (neighflag == FULL) no_virial_fdotr_compute = 1;
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0;
@ -257,19 +257,12 @@ void PairCoulDebyeKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == N2) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == FULLCLUSTER) {
neighbor->requests[irequest]->full_cluster = 1;
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with coul/debye/kk");
}

View File

@ -221,11 +221,9 @@ void PairCoulDSFKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with coul/dsf/kk");
}

View File

@ -102,7 +102,7 @@ void PairCoulLongKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
eflag = eflag_in;
vflag = vflag_in;
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
if (neighflag == FULL) no_virial_fdotr_compute = 1;
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0;
@ -408,11 +408,9 @@ void PairCoulLongKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with buck/coul/long/kk");
}

View File

@ -222,11 +222,9 @@ void PairCoulWolfKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with coul/wolf/kk");
}

View File

@ -286,11 +286,9 @@ void PairEAMAlloyKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with pair eam/kk/alloy");
}

View File

@ -291,11 +291,9 @@ void PairEAMFSKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with pair eam/kk/fs");
}

View File

@ -281,11 +281,9 @@ void PairEAMKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with pair eam/kk");
}

View File

@ -333,145 +333,6 @@ struct PairComputeFunctor {
}
};
template <class PairStyle, bool STACKPARAMS, class Specialisation>
struct PairComputeFunctor<PairStyle,FULLCLUSTER,STACKPARAMS,Specialisation> {
typedef typename PairStyle::device_type device_type ;
typedef EV_FLOAT value_type;
PairStyle c;
NeighListKokkos<device_type> list;
PairComputeFunctor(PairStyle* c_ptr,
NeighListKokkos<device_type>* list_ptr):
c(*c_ptr),list(*list_ptr) {};
~PairComputeFunctor() {c.cleanup_copy();list.clean_copy();};
KOKKOS_INLINE_FUNCTION int sbmask(const int& j) const {
return j >> SBBITS & 3;
}
template<int EVFLAG, int NEWTON_PAIR>
KOKKOS_FUNCTION
EV_FLOAT compute_item(const typename Kokkos::TeamPolicy<device_type>::member_type& dev,
const NeighListKokkos<device_type> &list, const NoCoulTag& ) const {
EV_FLOAT ev;
int i = dev.league_rank()*dev.team_size() + dev.team_rank();
const X_FLOAT xtmp = c.c_x(i,0);
const X_FLOAT ytmp = c.c_x(i,1);
const X_FLOAT ztmp = c.c_x(i,2);
int itype = c.type(i);
const AtomNeighborsConst neighbors_i = list.get_neighbors_const(i);
const int jnum = list.d_numneigh[i];
F_FLOAT3 ftmp;
for (int jj = 0; jj < jnum; jj++) {
int jjj = neighbors_i(jj);
Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(dev,NeighClusterSize),[&] (const int& k, F_FLOAT3& fftmp) {
const F_FLOAT factor_lj = c.special_lj[sbmask(jjj+k)];
const int j = (jjj + k)&NEIGHMASK;
if((j==i)||(j>=c.nall)) return;
const X_FLOAT delx = xtmp - c.c_x(j,0);
const X_FLOAT dely = ytmp - c.c_x(j,1);
const X_FLOAT delz = ztmp - c.c_x(j,2);
const int jtype = c.type(j);
const F_FLOAT rsq = (delx*delx + dely*dely + delz*delz);
if(rsq < (STACKPARAMS?c.m_cutsq[itype][jtype]:c.d_cutsq(itype,jtype))) {
const F_FLOAT fpair = factor_lj*c.template compute_fpair<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype);
fftmp.x += delx*fpair;
fftmp.y += dely*fpair;
fftmp.z += delz*fpair;
if (EVFLAG) {
F_FLOAT evdwl = 0.0;
if (c.eflag) {
evdwl = 0.5*
factor_lj * c.template compute_evdwl<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype);
ev.evdwl += evdwl;
}
if (c.vflag_either || c.eflag_atom) ev_tally(ev,i,j,evdwl,fpair,delx,dely,delz);
}
}
},ftmp);
}
Kokkos::single(Kokkos::PerThread(dev), [&]() {
c.f(i,0) += ftmp.x;
c.f(i,1) += ftmp.y;
c.f(i,2) += ftmp.z;
});
return ev;
}
KOKKOS_INLINE_FUNCTION
void ev_tally(EV_FLOAT &ev, const int &i, const int &j,
const F_FLOAT &epair, const F_FLOAT &fpair, const F_FLOAT &delx,
const F_FLOAT &dely, const F_FLOAT &delz) const
{
const int EFLAG = c.eflag;
const int NEWTON_PAIR = c.newton_pair;
const int VFLAG = c.vflag_either;
if (EFLAG) {
if (c.eflag_atom) {
const E_FLOAT epairhalf = 0.5 * epair;
if (NEWTON_PAIR || i < c.nlocal) c.d_eatom[i] += epairhalf;
if (NEWTON_PAIR || j < c.nlocal) c.d_eatom[j] += epairhalf;
}
}
if (VFLAG) {
const E_FLOAT v0 = delx*delx*fpair;
const E_FLOAT v1 = dely*dely*fpair;
const E_FLOAT v2 = delz*delz*fpair;
const E_FLOAT v3 = delx*dely*fpair;
const E_FLOAT v4 = delx*delz*fpair;
const E_FLOAT v5 = dely*delz*fpair;
if (c.vflag_global) {
ev.v[0] += 0.5*v0;
ev.v[1] += 0.5*v1;
ev.v[2] += 0.5*v2;
ev.v[3] += 0.5*v3;
ev.v[4] += 0.5*v4;
ev.v[5] += 0.5*v5;
}
if (c.vflag_atom) {
if (i < c.nlocal) {
c.d_vatom(i,0) += 0.5*v0;
c.d_vatom(i,1) += 0.5*v1;
c.d_vatom(i,2) += 0.5*v2;
c.d_vatom(i,3) += 0.5*v3;
c.d_vatom(i,4) += 0.5*v4;
c.d_vatom(i,5) += 0.5*v5;
}
}
}
}
KOKKOS_INLINE_FUNCTION
void operator()(const typename Kokkos::TeamPolicy<device_type>::member_type& dev) const {
if (c.newton_pair) compute_item<0,1>(dev,list,typename DoCoul<PairStyle::COUL_FLAG>::type());
else compute_item<0,0>(dev,list,typename DoCoul<PairStyle::COUL_FLAG>::type());
}
KOKKOS_INLINE_FUNCTION
void operator()(const typename Kokkos::TeamPolicy<device_type>::member_type& dev, value_type &energy_virial) const {
if (c.newton_pair)
energy_virial += compute_item<1,1>(dev,list,typename DoCoul<PairStyle::COUL_FLAG>::type());
else
energy_virial += compute_item<1,0>(dev,list,typename DoCoul<PairStyle::COUL_FLAG>::type());
}
};
template <class PairStyle, bool STACKPARAMS, class Specialisation>
struct PairComputeFunctor<PairStyle,N2,STACKPARAMS,Specialisation> {
typedef typename PairStyle::device_type device_type ;
@ -607,8 +468,8 @@ struct PairComputeFunctor<PairStyle,N2,STACKPARAMS,Specialisation> {
// The enable_if clause will invalidate the last parameter of the function, so that
// a match is only achieved, if PairStyle supports the specific neighborlist variant.
// This uses the fact that failure to match template parameters is not an error.
// By having the enable_if with a ! and without it, exactly one of the two versions of the functions
// pair_compute_neighlist and pair_compute_fullcluster will match - either the dummy version
// By having the enable_if with a ! and without it, exactly one of the functions
// pair_compute_neighlist will match - either the dummy version
// or the real one further below.
template<class PairStyle, unsigned NEIGHFLAG, class Specialisation>
EV_FLOAT pair_compute_neighlist (PairStyle* fpair, typename Kokkos::Impl::enable_if<!((NEIGHFLAG&PairStyle::EnabledNeighFlags) != 0), NeighListKokkos<typename PairStyle::device_type>*>::type list) {
@ -619,15 +480,6 @@ EV_FLOAT pair_compute_neighlist (PairStyle* fpair, typename Kokkos::Impl::enable
return ev;
}
template<class PairStyle, class Specialisation>
EV_FLOAT pair_compute_fullcluster (PairStyle* fpair, typename Kokkos::Impl::enable_if<!((FULLCLUSTER&PairStyle::EnabledNeighFlags) != 0), NeighListKokkos<typename PairStyle::device_type>*>::type list) {
EV_FLOAT ev;
(void) fpair;
(void) list;
printf("ERROR: calling pair_compute with invalid neighbor list style: requested %i available %i \n",FULLCLUSTER,PairStyle::EnabledNeighFlags);
return ev;
}
// Submit ParallelFor for NEIGHFLAG=HALF,HALFTHREAD,FULL,N2
template<class PairStyle, unsigned NEIGHFLAG, class Specialisation>
EV_FLOAT pair_compute_neighlist (PairStyle* fpair, typename Kokkos::Impl::enable_if<(NEIGHFLAG&PairStyle::EnabledNeighFlags) != 0, NeighListKokkos<typename PairStyle::device_type>*>::type list) {
@ -644,41 +496,6 @@ EV_FLOAT pair_compute_neighlist (PairStyle* fpair, typename Kokkos::Impl::enable
return ev;
}
// Submit ParallelFor for NEIGHFLAG=FULLCLUSTER
template<class PairStyle, class Specialisation>
EV_FLOAT pair_compute_fullcluster (PairStyle* fpair, typename Kokkos::Impl::enable_if<(FULLCLUSTER&PairStyle::EnabledNeighFlags) != 0, NeighListKokkos<typename PairStyle::device_type>*>::type list) {
EV_FLOAT ev;
if(fpair->atom->ntypes > MAX_TYPES_STACKPARAMS) {
typedef PairComputeFunctor<PairStyle,FULLCLUSTER,false,Specialisation >
f_type;
f_type ff(fpair, list);
#ifdef KOKKOS_HAVE_CUDA
const int teamsize = Kokkos::Impl::is_same<typename f_type::device_type, Kokkos::Cuda>::value ? 32 : 1;
#else
const int teamsize = 1;
#endif
const int nteams = (list->inum*+teamsize-1)/teamsize;
Kokkos::TeamPolicy<typename f_type::device_type> config(nteams,teamsize,NeighClusterSize);
if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(config,ff,ev);
else Kokkos::parallel_for(config,ff);
} else {
typedef PairComputeFunctor<PairStyle,FULLCLUSTER,true,Specialisation >
f_type;
f_type ff(fpair, list);
#ifdef KOKKOS_HAVE_CUDA
const int teamsize = Kokkos::Impl::is_same<typename f_type::device_type, Kokkos::Cuda>::value ? 32 : 1;
#else
const int teamsize = 1;
#endif
const int nteams = (list->inum*+teamsize-1)/teamsize;
Kokkos::TeamPolicy<typename f_type::device_type> config(nteams,teamsize,NeighClusterSize);
if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(config,ff,ev);
else Kokkos::parallel_for(config,ff);
}
return ev;
}
template<class PairStyle, class Specialisation>
EV_FLOAT pair_compute (PairStyle* fpair, NeighListKokkos<typename PairStyle::device_type>* list) {
EV_FLOAT ev;
@ -690,8 +507,6 @@ EV_FLOAT pair_compute (PairStyle* fpair, NeighListKokkos<typename PairStyle::dev
ev = pair_compute_neighlist<PairStyle,HALF,Specialisation> (fpair,list);
} else if (fpair->neighflag == N2) {
ev = pair_compute_neighlist<PairStyle,N2,Specialisation> (fpair,list);
} else if (fpair->neighflag == FULLCLUSTER) {
ev = pair_compute_fullcluster<PairStyle,Specialisation> (fpair,list);
}
return ev;
}

View File

@ -110,7 +110,7 @@ void PairLJCharmmCoulCharmmImplicitKokkos<DeviceType>::compute(int eflag_in, int
eflag = eflag_in;
vflag = vflag_in;
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
if (neighflag == FULL) no_virial_fdotr_compute = 1;
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0;
@ -455,11 +455,9 @@ void PairLJCharmmCoulCharmmImplicitKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with lj/charmm/coul/charmm/implicit/kk");
}

View File

@ -110,7 +110,7 @@ void PairLJCharmmCoulCharmmKokkos<DeviceType>::compute(int eflag_in, int vflag_i
eflag = eflag_in;
vflag = vflag_in;
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
if (neighflag == FULL) no_virial_fdotr_compute = 1;
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0;
@ -456,11 +456,9 @@ void PairLJCharmmCoulCharmmKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with lj/charmm/coul/charmm/kk");
}

View File

@ -110,7 +110,7 @@ void PairLJCharmmCoulLongKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
eflag = eflag_in;
vflag = vflag_in;
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
if (neighflag == FULL) no_virial_fdotr_compute = 1;
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0;
@ -486,11 +486,9 @@ void PairLJCharmmCoulLongKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with lj/charmm/coul/long/kk");
}

View File

@ -87,7 +87,7 @@ void PairLJClass2CoulCutKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
eflag = eflag_in;
vflag = vflag_in;
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
if (neighflag == FULL) no_virial_fdotr_compute = 1;
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0;
@ -289,19 +289,12 @@ void PairLJClass2CoulCutKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == N2) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == FULLCLUSTER) {
neighbor->requests[irequest]->full_cluster = 1;
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with lj/class2/coul/cut/kk");
}

View File

@ -95,7 +95,7 @@ void PairLJClass2CoulLongKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
eflag = eflag_in;
vflag = vflag_in;
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
if (neighflag == FULL) no_virial_fdotr_compute = 1;
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0;
@ -445,11 +445,9 @@ void PairLJClass2CoulLongKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with lj/class2/coul/long/kk");
}

View File

@ -87,7 +87,7 @@ void PairLJClass2Kokkos<DeviceType>::compute(int eflag_in, int vflag_in)
vflag = vflag_in;
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
if (neighflag == FULL) no_virial_fdotr_compute = 1;
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0;
@ -227,19 +227,12 @@ void PairLJClass2Kokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == N2) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == FULLCLUSTER) {
neighbor->requests[irequest]->full_cluster = 1;
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with lj/class2/kk");
}

View File

@ -31,7 +31,7 @@ namespace LAMMPS_NS {
template<class DeviceType>
class PairLJClass2Kokkos : public PairLJClass2 {
public:
enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF|N2|FULLCLUSTER};
enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF|N2};
enum {COUL_FLAG=0};
typedef DeviceType device_type;
PairLJClass2Kokkos(class LAMMPS *);
@ -99,17 +99,14 @@ class PairLJClass2Kokkos : public PairLJClass2 {
friend class PairComputeFunctor<PairLJClass2Kokkos,HALF,true>;
friend class PairComputeFunctor<PairLJClass2Kokkos,HALFTHREAD,true>;
friend class PairComputeFunctor<PairLJClass2Kokkos,N2,true>;
friend class PairComputeFunctor<PairLJClass2Kokkos,FULLCLUSTER,true >;
friend class PairComputeFunctor<PairLJClass2Kokkos,FULL,false>;
friend class PairComputeFunctor<PairLJClass2Kokkos,HALF,false>;
friend class PairComputeFunctor<PairLJClass2Kokkos,HALFTHREAD,false>;
friend class PairComputeFunctor<PairLJClass2Kokkos,N2,false>;
friend class PairComputeFunctor<PairLJClass2Kokkos,FULLCLUSTER,false >;
friend EV_FLOAT pair_compute_neighlist<PairLJClass2Kokkos,FULL,void>(PairLJClass2Kokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJClass2Kokkos,HALF,void>(PairLJClass2Kokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJClass2Kokkos,HALFTHREAD,void>(PairLJClass2Kokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJClass2Kokkos,N2,void>(PairLJClass2Kokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_fullcluster<PairLJClass2Kokkos,void>(PairLJClass2Kokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute<PairLJClass2Kokkos,void>(PairLJClass2Kokkos*,NeighListKokkos<DeviceType>*);
friend void pair_virial_fdotr_compute<PairLJClass2Kokkos>(PairLJClass2Kokkos*);
};

View File

@ -87,7 +87,7 @@ void PairLJCutCoulCutKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
eflag = eflag_in;
vflag = vflag_in;
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
if (neighflag == FULL) no_virial_fdotr_compute = 1;
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0;
@ -280,19 +280,12 @@ void PairLJCutCoulCutKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == N2) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == FULLCLUSTER) {
neighbor->requests[irequest]->full_cluster = 1;
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with lj/cut/coul/cut/kk");
}

View File

@ -91,7 +91,7 @@ void PairLJCutCoulDebyeKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
eflag = eflag_in;
vflag = vflag_in;
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
if (neighflag == FULL) no_virial_fdotr_compute = 1;
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0;
@ -310,19 +310,12 @@ void PairLJCutCoulDebyeKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == N2) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == FULLCLUSTER) {
neighbor->requests[irequest]->full_cluster = 1;
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with lj/cut/coul/debye/kk");
}

View File

@ -99,7 +99,7 @@ void PairLJCutCoulDSFKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
eflag = eflag_in;
vflag = vflag_in;
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
if (neighflag == FULL) no_virial_fdotr_compute = 1;
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0;
@ -301,19 +301,12 @@ void PairLJCutCoulDSFKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == N2) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == FULLCLUSTER) {
neighbor->requests[irequest]->full_cluster = 1;
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with lj/cut/coul/cut/kk");
}

View File

@ -99,7 +99,7 @@ void PairLJCutCoulLongKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
eflag = eflag_in;
vflag = vflag_in;
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
if (neighflag == FULL) no_virial_fdotr_compute = 1;
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0;
@ -464,11 +464,9 @@ void PairLJCutCoulLongKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with lj/cut/coul/long/kk");
}

View File

@ -87,7 +87,7 @@ void PairLJCutKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
vflag = vflag_in;
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
if (neighflag == FULL) no_virial_fdotr_compute = 1;
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0;
@ -245,19 +245,12 @@ void PairLJCutKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == N2) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == FULLCLUSTER) {
neighbor->requests[irequest]->full_cluster = 1;
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with lj/cut/kk");
}

View File

@ -31,7 +31,7 @@ namespace LAMMPS_NS {
template<class DeviceType>
class PairLJCutKokkos : public PairLJCut {
public:
enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF|N2|FULLCLUSTER};
enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF|N2};
enum {COUL_FLAG=0};
typedef DeviceType device_type;
PairLJCutKokkos(class LAMMPS *);
@ -99,17 +99,14 @@ class PairLJCutKokkos : public PairLJCut {
friend class PairComputeFunctor<PairLJCutKokkos,HALF,true>;
friend class PairComputeFunctor<PairLJCutKokkos,HALFTHREAD,true>;
friend class PairComputeFunctor<PairLJCutKokkos,N2,true>;
friend class PairComputeFunctor<PairLJCutKokkos,FULLCLUSTER,true >;
friend class PairComputeFunctor<PairLJCutKokkos,FULL,false>;
friend class PairComputeFunctor<PairLJCutKokkos,HALF,false>;
friend class PairComputeFunctor<PairLJCutKokkos,HALFTHREAD,false>;
friend class PairComputeFunctor<PairLJCutKokkos,N2,false>;
friend class PairComputeFunctor<PairLJCutKokkos,FULLCLUSTER,false >;
friend EV_FLOAT pair_compute_neighlist<PairLJCutKokkos,FULL,void>(PairLJCutKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJCutKokkos,HALF,void>(PairLJCutKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJCutKokkos,HALFTHREAD,void>(PairLJCutKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJCutKokkos,N2,void>(PairLJCutKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_fullcluster<PairLJCutKokkos,void>(PairLJCutKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute<PairLJCutKokkos,void>(PairLJCutKokkos*,NeighListKokkos<DeviceType>*);
friend void pair_virial_fdotr_compute<PairLJCutKokkos>(PairLJCutKokkos*);
};

View File

@ -86,7 +86,7 @@ void PairLJExpandKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
eflag = eflag_in;
vflag = vflag_in;
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
if (neighflag == FULL) no_virial_fdotr_compute = 1;
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0;
@ -230,19 +230,12 @@ void PairLJExpandKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == N2) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == FULLCLUSTER) {
neighbor->requests[irequest]->full_cluster = 1;
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with lj/expand/kk");
}

View File

@ -31,7 +31,7 @@ namespace LAMMPS_NS {
template<class DeviceType>
class PairLJExpandKokkos : public PairLJExpand {
public:
enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF|N2|FULLCLUSTER};
enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF|N2};
enum {COUL_FLAG=0};
typedef DeviceType device_type;
PairLJExpandKokkos(class LAMMPS *);
@ -100,17 +100,14 @@ class PairLJExpandKokkos : public PairLJExpand {
friend class PairComputeFunctor<PairLJExpandKokkos,HALF,true>;
friend class PairComputeFunctor<PairLJExpandKokkos,HALFTHREAD,true>;
friend class PairComputeFunctor<PairLJExpandKokkos,N2,true>;
friend class PairComputeFunctor<PairLJExpandKokkos,FULLCLUSTER,true >;
friend class PairComputeFunctor<PairLJExpandKokkos,FULL,false>;
friend class PairComputeFunctor<PairLJExpandKokkos,HALF,false>;
friend class PairComputeFunctor<PairLJExpandKokkos,HALFTHREAD,false>;
friend class PairComputeFunctor<PairLJExpandKokkos,N2,false>;
friend class PairComputeFunctor<PairLJExpandKokkos,FULLCLUSTER,false >;
friend EV_FLOAT pair_compute_neighlist<PairLJExpandKokkos,FULL,void>(PairLJExpandKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJExpandKokkos,HALF,void>(PairLJExpandKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJExpandKokkos,HALFTHREAD,void>(PairLJExpandKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJExpandKokkos,N2,void>(PairLJExpandKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_fullcluster<PairLJExpandKokkos,void>(PairLJExpandKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute<PairLJExpandKokkos,void>(PairLJExpandKokkos*,NeighListKokkos<DeviceType>*);
friend void pair_virial_fdotr_compute<PairLJExpandKokkos>(PairLJExpandKokkos*);
};

View File

@ -101,7 +101,7 @@ void PairLJGromacsCoulGromacsKokkos<DeviceType>::compute(int eflag_in, int vflag
eflag = eflag_in;
vflag = vflag_in;
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
if (neighflag == FULL) no_virial_fdotr_compute = 1;
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0;
@ -439,11 +439,9 @@ void PairLJGromacsCoulGromacsKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with lj/gromacs/coul/gromacs/kk");
}

View File

@ -98,7 +98,7 @@ void PairLJGromacsKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
eflag = eflag_in;
vflag = vflag_in;
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
if (neighflag == FULL) no_virial_fdotr_compute = 1;
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0;
@ -277,11 +277,9 @@ void PairLJGromacsKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with lj/gromacs/kk");
}

View File

@ -86,7 +86,7 @@ void PairLJSDKKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
vflag = vflag_in;
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
if (neighflag == FULL) no_virial_fdotr_compute = 1;
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0;
@ -258,19 +258,12 @@ void PairLJSDKKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == N2) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == FULLCLUSTER) {
neighbor->requests[irequest]->full_cluster = 1;
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with lj/sdk/kk");
}

View File

@ -31,7 +31,7 @@ namespace LAMMPS_NS {
template<class DeviceType>
class PairLJSDKKokkos : public PairLJSDK {
public:
enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF|N2|FULLCLUSTER};
enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF|N2};
enum {COUL_FLAG=0};
typedef DeviceType device_type;
PairLJSDKKokkos(class LAMMPS *);
@ -97,17 +97,14 @@ class PairLJSDKKokkos : public PairLJSDK {
friend class PairComputeFunctor<PairLJSDKKokkos,HALF,true>;
friend class PairComputeFunctor<PairLJSDKKokkos,HALFTHREAD,true>;
friend class PairComputeFunctor<PairLJSDKKokkos,N2,true>;
friend class PairComputeFunctor<PairLJSDKKokkos,FULLCLUSTER,true >;
friend class PairComputeFunctor<PairLJSDKKokkos,FULL,false>;
friend class PairComputeFunctor<PairLJSDKKokkos,HALF,false>;
friend class PairComputeFunctor<PairLJSDKKokkos,HALFTHREAD,false>;
friend class PairComputeFunctor<PairLJSDKKokkos,N2,false>;
friend class PairComputeFunctor<PairLJSDKKokkos,FULLCLUSTER,false >;
friend EV_FLOAT pair_compute_neighlist<PairLJSDKKokkos,FULL,void>(PairLJSDKKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJSDKKokkos,HALF,void>(PairLJSDKKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJSDKKokkos,HALFTHREAD,void>(PairLJSDKKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJSDKKokkos,N2,void>(PairLJSDKKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_fullcluster<PairLJSDKKokkos,void>(PairLJSDKKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute<PairLJSDKKokkos,void>(PairLJSDKKokkos*,NeighListKokkos<DeviceType>*);
friend void pair_virial_fdotr_compute<PairLJSDKKokkos>(PairLJSDKKokkos*);
};

View File

@ -146,12 +146,10 @@ void PairReaxCKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
neighbor->requests[irequest]->ghost = 1;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
neighbor->requests[irequest]->ghost = 1;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with reax/c/kk");

View File

@ -601,7 +601,6 @@ void PairSWKokkos<DeviceType>::init_style()
if (neighflag == FULL || neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
if (neighflag == FULL)
neighbor->requests[irequest]->ghost = 1;
else

View File

@ -96,7 +96,7 @@ void PairTableKokkos<DeviceType>::compute_style(int eflag_in, int vflag_in)
eflag = eflag_in;
vflag = vflag_in;
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
if (neighflag == FULL) no_virial_fdotr_compute = 1;
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0;
@ -142,19 +142,6 @@ void PairTableKokkos<DeviceType>::compute_style(int eflag_in, int vflag_in)
f(this,(NeighListKokkos<DeviceType>*) list);
if (eflag || vflag) Kokkos::parallel_reduce(nlocal,f,ev);
else Kokkos::parallel_for(nlocal,f);
} else if (neighflag == FULLCLUSTER) {
typedef PairComputeFunctor<PairTableKokkos<DeviceType>,FULLCLUSTER,false,S_TableCompute<DeviceType,TABSTYLE> >
f_type;
f_type f(this,(NeighListKokkos<DeviceType>*) list);
#ifdef KOKKOS_HAVE_CUDA
const int teamsize = Kokkos::Impl::is_same<DeviceType, Kokkos::Cuda>::value ? 32 : 1;
#else
const int teamsize = 1;
#endif
const int nteams = (list->inum*+teamsize-1)/teamsize;
Kokkos::TeamPolicy<DeviceType> config(nteams,teamsize,NeighClusterSize);
if (eflag || vflag) Kokkos::parallel_reduce(config,f,ev);
else Kokkos::parallel_for(config,f);
}
} else {
if (neighflag == FULL) {
@ -177,19 +164,6 @@ void PairTableKokkos<DeviceType>::compute_style(int eflag_in, int vflag_in)
f(this,(NeighListKokkos<DeviceType>*) list);
if (eflag || vflag) Kokkos::parallel_reduce(nlocal,f,ev);
else Kokkos::parallel_for(nlocal,f);
} else if (neighflag == FULLCLUSTER) {
typedef PairComputeFunctor<PairTableKokkos<DeviceType>,FULLCLUSTER,true,S_TableCompute<DeviceType,TABSTYLE> >
f_type;
f_type f(this,(NeighListKokkos<DeviceType>*) list);
#ifdef KOKKOS_HAVE_CUDA
const int teamsize = Kokkos::Impl::is_same<DeviceType, Kokkos::Cuda>::value ? 32 : 1;
#else
const int teamsize = 1;
#endif
const int nteams = (list->inum*+teamsize-1)/teamsize;
Kokkos::TeamPolicy<DeviceType> config(nteams,teamsize,NeighClusterSize);
if (eflag || vflag) Kokkos::parallel_reduce(config,f,ev);
else Kokkos::parallel_for(config,f);
}
}
@ -1261,19 +1235,12 @@ void PairTableKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == N2) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == FULLCLUSTER) {
neighbor->requests[irequest]->full_cluster = 1;
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with lj/cut/kk");
}

View File

@ -41,7 +41,7 @@ template<class DeviceType>
class PairTableKokkos : public Pair {
public:
enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF|N2|FULLCLUSTER};
enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF|N2};
enum {COUL_FLAG=0};
typedef DeviceType device_type;
@ -170,45 +170,37 @@ class PairTableKokkos : public Pair {
friend class PairComputeFunctor<PairTableKokkos,HALF,true,S_TableCompute<DeviceType,LOOKUP> >;
friend class PairComputeFunctor<PairTableKokkos,HALFTHREAD,true,S_TableCompute<DeviceType,LOOKUP> >;
friend class PairComputeFunctor<PairTableKokkos,N2,true,S_TableCompute<DeviceType,LOOKUP> >;
friend class PairComputeFunctor<PairTableKokkos,FULLCLUSTER,true,S_TableCompute<DeviceType,LOOKUP> >;
friend class PairComputeFunctor<PairTableKokkos,FULL,false,S_TableCompute<DeviceType,LOOKUP> >;
friend class PairComputeFunctor<PairTableKokkos,HALF,false,S_TableCompute<DeviceType,LOOKUP> >;
friend class PairComputeFunctor<PairTableKokkos,HALFTHREAD,false,S_TableCompute<DeviceType,LOOKUP> >;
friend class PairComputeFunctor<PairTableKokkos,N2,false,S_TableCompute<DeviceType,LOOKUP> >;
friend class PairComputeFunctor<PairTableKokkos,FULLCLUSTER,false,S_TableCompute<DeviceType,LOOKUP> >;
friend class PairComputeFunctor<PairTableKokkos,FULL,true,S_TableCompute<DeviceType,LINEAR> >;
friend class PairComputeFunctor<PairTableKokkos,HALF,true,S_TableCompute<DeviceType,LINEAR> >;
friend class PairComputeFunctor<PairTableKokkos,HALFTHREAD,true,S_TableCompute<DeviceType,LINEAR> >;
friend class PairComputeFunctor<PairTableKokkos,N2,true,S_TableCompute<DeviceType,LINEAR> >;
friend class PairComputeFunctor<PairTableKokkos,FULLCLUSTER,true,S_TableCompute<DeviceType,LINEAR> >;
friend class PairComputeFunctor<PairTableKokkos,FULL,false,S_TableCompute<DeviceType,LINEAR> >;
friend class PairComputeFunctor<PairTableKokkos,HALF,false,S_TableCompute<DeviceType,LINEAR> >;
friend class PairComputeFunctor<PairTableKokkos,HALFTHREAD,false,S_TableCompute<DeviceType,LINEAR> >;
friend class PairComputeFunctor<PairTableKokkos,N2,false,S_TableCompute<DeviceType,LINEAR> >;
friend class PairComputeFunctor<PairTableKokkos,FULLCLUSTER,false,S_TableCompute<DeviceType,LINEAR> >;
friend class PairComputeFunctor<PairTableKokkos,FULL,true,S_TableCompute<DeviceType,SPLINE> >;
friend class PairComputeFunctor<PairTableKokkos,HALF,true,S_TableCompute<DeviceType,SPLINE> >;
friend class PairComputeFunctor<PairTableKokkos,HALFTHREAD,true,S_TableCompute<DeviceType,SPLINE> >;
friend class PairComputeFunctor<PairTableKokkos,N2,true,S_TableCompute<DeviceType,SPLINE> >;
friend class PairComputeFunctor<PairTableKokkos,FULLCLUSTER,true,S_TableCompute<DeviceType,SPLINE> >;
friend class PairComputeFunctor<PairTableKokkos,FULL,false,S_TableCompute<DeviceType,SPLINE> >;
friend class PairComputeFunctor<PairTableKokkos,HALF,false,S_TableCompute<DeviceType,SPLINE> >;
friend class PairComputeFunctor<PairTableKokkos,HALFTHREAD,false,S_TableCompute<DeviceType,SPLINE> >;
friend class PairComputeFunctor<PairTableKokkos,N2,false,S_TableCompute<DeviceType,SPLINE> >;
friend class PairComputeFunctor<PairTableKokkos,FULLCLUSTER,false,S_TableCompute<DeviceType,SPLINE> >;
friend class PairComputeFunctor<PairTableKokkos,FULL,true,S_TableCompute<DeviceType,BITMAP> >;
friend class PairComputeFunctor<PairTableKokkos,HALF,true,S_TableCompute<DeviceType,BITMAP> >;
friend class PairComputeFunctor<PairTableKokkos,HALFTHREAD,true,S_TableCompute<DeviceType,BITMAP> >;
friend class PairComputeFunctor<PairTableKokkos,N2,true,S_TableCompute<DeviceType,BITMAP> >;
friend class PairComputeFunctor<PairTableKokkos,FULLCLUSTER,true,S_TableCompute<DeviceType,BITMAP> >;
friend class PairComputeFunctor<PairTableKokkos,FULL,false,S_TableCompute<DeviceType,BITMAP> >;
friend class PairComputeFunctor<PairTableKokkos,HALF,false,S_TableCompute<DeviceType,BITMAP> >;
friend class PairComputeFunctor<PairTableKokkos,HALFTHREAD,false,S_TableCompute<DeviceType,BITMAP> >;
friend class PairComputeFunctor<PairTableKokkos,N2,false,S_TableCompute<DeviceType,BITMAP> >;
friend class PairComputeFunctor<PairTableKokkos,FULLCLUSTER,false,S_TableCompute<DeviceType,BITMAP> >;
friend void pair_virial_fdotr_compute<PairTableKokkos>(PairTableKokkos*);
};

View File

@ -103,7 +103,6 @@ void PairTersoffKokkos<DeviceType>::init_style()
//if (neighflag == FULL || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
if (neighflag == FULL)
neighbor->requests[irequest]->ghost = 1;
else

View File

@ -102,7 +102,6 @@ void PairTersoffMODKokkos<DeviceType>::init_style()
if (neighflag == FULL || neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
if (neighflag == FULL)
neighbor->requests[irequest]->ghost = 1;
else

View File

@ -113,7 +113,6 @@ void PairTersoffZBLKokkos<DeviceType>::init_style()
if (neighflag == FULL || neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
if (neighflag == FULL)
neighbor->requests[irequest]->ghost = 1;
else

View File

@ -59,8 +59,9 @@ style () {
# called by "make machine"
# col 1 = string to search for
# col 2 = search in *.h files starting with this name
# col 3 = prefix of style file
# col 4
# col 3 = name of style file
# col 4 = file that includes the style file
# col 5 = optional 2nd file that includes the style file
if (test $1 = "style") then
@ -69,7 +70,7 @@ if (test $1 = "style") then
style BODY_CLASS body_ body atom_vec_body
style BOND_CLASS bond_ bond force
style COMMAND_CLASS "" command input
style COMPUTE_CLASS compute_ compute modify modify_cuda
style COMPUTE_CLASS compute_ compute modify
style DIHEDRAL_CLASS dihedral_ dihedral force
style DUMP_CLASS dump_ dump output write_dump
style FIX_CLASS fix_ fix modify
@ -77,6 +78,10 @@ if (test $1 = "style") then
style INTEGRATE_CLASS "" integrate update
style KSPACE_CLASS "" kspace force
style MINIMIZE_CLASS min_ minimize update
style NBIN_CLASS nbin_ nbin neighbor
style NPAIR_CLASS npair_ npair neighbor
style NSTENCIL_CLASS nstencil_ nstencil neighbor
style NTOPO_CLASS ntopo_ ntopo neighbor
style PAIR_CLASS pair_ pair force
style READER_CLASS reader_ reader read_dump
style REGION_CLASS region_ region domain

View File

@ -13,6 +13,40 @@ style_kspace.h
style_minimize.h
style_pair.h
style_region.h
style_neigh_bin.h
style_neigh_pair.h
style_neigh_stencil.h
# deleted on ## XXX 2016
accelerator_intel.h
neigh_bond.cpp
neigh_bond.h
neigh_derive.cpp
neigh_derive.h
neigh_full.cpp
neigh_full.h
neigh_gran.cpp
neigh_gran.h
neigh_half_bin.cpp
neigh_half_bin.h
neigh_half_multi.cpp
neigh_half_multi.h
neigh_half_nsq.cpp
neigh_half_nsq.h
neigh_respa.cpp
neigh_respa.h
neigh_shardlow.cpp
neigh_shardlow.h
neigh_stencil.cpp
neigh_half_bin_intel.cpp
neigh_full_kokkos.h
neighbor_omp.h
neigh_derive_omp.cpp
neigh_full_omp.cpp
neigh_gran_omp.cpp
neigh_half_bin_omp.cpp
neigh_half_multi_omp.cpp
neigh_half_nsq_omp.cpp
neigh_respa_omp.cpp
# deleted on 20 Sep 2016
fix_ti_rs.cpp
fix_ti_rs.h

View File

@ -47,6 +47,7 @@
#include "comm.h"
#include "neighbor.h"
#include "neigh_list.h"
#include "neigh_request.h"
#include "random_mars.h"
#include "memory.h"
#include "domain.h"
@ -139,6 +140,23 @@ int FixShardlow::setmask()
/* ---------------------------------------------------------------------- */
void FixShardlow::init()
{
int irequest = neighbor->request(this,instance_me);
neighbor->requests[irequest]->pair = 0;
neighbor->requests[irequest]->fix = 1;
neighbor->requests[irequest]->ssa = 1;
}
/* ---------------------------------------------------------------------- */
void FixShardlow::init_list(int id, NeighList *ptr)
{
list = ptr;
}
/* ---------------------------------------------------------------------- */
void FixShardlow::pre_exchange()
{
memset(atom->ssaAIR, 0, sizeof(int)*atom->nlocal);
@ -217,7 +235,6 @@ void FixShardlow::ssa_update(
int newton_pair = force->newton_pair;
double randPair;
int *ssaAIR = atom->ssaAIR;
double *uCond = atom->uCond;
double *uMech = atom->uMech;
double *dpdTheta = atom->dpdTheta;
@ -411,7 +428,6 @@ void FixShardlow::initial_integrate(int vflag)
int nghost = atom->nghost;
int airnum;
class NeighList *list; // points to list in pairDPD or pairDPDE
class RanMars *pRNG;
// NOTE: this logic is specific to orthogonal boxes, not triclinic
@ -432,12 +448,10 @@ void FixShardlow::initial_integrate(int vflag)
// Allocate memory for v_t0 to hold the initial velocities for the ghosts
v_t0 = (double (*)[3]) memory->smalloc(sizeof(double)*3*nghost, "FixShardlow:v_t0");
// Define pointers to access the neighbor list and RNG
// Define pointers to access the RNG
if(pairDPDE){
list = pairDPDE->list;
pRNG = pairDPDE->random;
} else {
list = pairDPD->list;
pRNG = pairDPD->random;
}
inum = list->inum;

View File

@ -26,9 +26,13 @@ namespace LAMMPS_NS {
class FixShardlow : public Fix {
public:
class NeighList *list; // The SSA specific neighbor list
FixShardlow(class LAMMPS *, int, char **);
~FixShardlow();
int setmask();
virtual void init();
virtual void init_list(int, class NeighList *);
virtual void setup(int);
virtual void initial_integrate(int);
void setup_pre_exchange();

129
src/USER-DPD/nbin_ssa.cpp Normal file
View File

@ -0,0 +1,129 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing authors:
James Larentzos (ARL) and Timothy I. Mattox (Engility Corporation)
------------------------------------------------------------------------- */
#include "nbin_ssa.h"
#include "atom.h"
#include "group.h"
#include "memory.h"
#include "error.h"
using namespace LAMMPS_NS;
/* ---------------------------------------------------------------------- */
NBinSSA::NBinSSA(LAMMPS *lmp) : NBinStandard(lmp)
{
maxbin_ssa = 0;
bins_ssa = NULL;
maxhead_ssa = 0;
binhead_ssa = NULL;
gbinhead_ssa = NULL;
}
NBinSSA::~NBinSSA()
{
memory->destroy(bins_ssa);
memory->destroy(binhead_ssa);
memory->destroy(gbinhead_ssa);
}
/* ----------------------------------------------------------------------
bin owned and ghost atoms for the Shardlow Splitting Algorithm (SSA)
local atoms are in distinct bins (binhead_ssa) from the ghosts
ghost atoms are in distinct bins (gbinhead_ssa) from the locals
ghosts which are not in an Active Interaction Region (AIR) are skipped
------------------------------------------------------------------------- */
void NBinSSA::bin_atoms()
{
int i,ibin;
int nlocal = atom->nlocal;
int nall = nlocal + atom->nghost;
if (includegroup) nlocal = atom->nfirst;
double **x = atom->x;
int *mask = atom->mask;
int *ssaAIR = atom->ssaAIR;
for (i = 0; i < mbins; i++) {
gbinhead_ssa[i] = -1;
binhead_ssa[i] = -1;
}
// bin in reverse order so linked list will be in forward order
if (includegroup) {
int bitmask = group->bitmask[includegroup];
int nowned = atom->nlocal; // NOTE: nlocal was set to atom->nfirst above
for (i = nall-1; i >= nowned; i--) {
if (ssaAIR[i] < 2) continue; // skip ghost atoms not in AIR
if (mask[i] & bitmask) {
ibin = coord2bin(x[i]);
bins_ssa[i] = gbinhead_ssa[ibin];
gbinhead_ssa[ibin] = i;
}
}
} else {
for (i = nall-1; i >= nlocal; i--) {
if (ssaAIR[i] < 2) continue; // skip ghost atoms not in AIR
ibin = coord2bin(x[i]);
bins_ssa[i] = gbinhead_ssa[ibin];
gbinhead_ssa[ibin] = i;
}
}
for (i = nlocal-1; i >= 0; i--) {
ibin = coord2bin(x[i]);
bins_ssa[i] = binhead_ssa[ibin];
binhead_ssa[ibin] = i;
}
}
/* ---------------------------------------------------------------------- */
void NBinSSA::bin_atoms_setup(int nall)
{
NBinStandard::bin_atoms_setup(nall); // Setup the parent class's data too
if (mbins > maxhead_ssa) {
maxhead_ssa = mbins;
memory->destroy(gbinhead_ssa);
memory->destroy(binhead_ssa);
memory->create(binhead_ssa,maxhead_ssa,"binhead_ssa");
memory->create(gbinhead_ssa,maxhead_ssa,"gbinhead_ssa");
}
if (nall > maxbin_ssa) {
maxbin_ssa = nall;
memory->destroy(bins_ssa);
memory->create(bins_ssa,maxbin_ssa,"bins_ssa");
}
}
/* ---------------------------------------------------------------------- */
bigint NBinSSA::memory_usage()
{
bigint bytes = NBinStandard::memory_usage(); // Count the parent's usage too
if (maxbin_ssa) bytes += memory->usage(bins_ssa,maxbin_ssa);
if (maxhead_ssa) {
bytes += memory->usage(binhead_ssa,maxhead_ssa);
bytes += memory->usage(gbinhead_ssa,maxhead_ssa);
}
return bytes;
}

54
src/USER-DPD/nbin_ssa.h Normal file
View File

@ -0,0 +1,54 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef NBIN_CLASS
NBinStyle(ssa,
NBinSSA,
NB_SSA)
#else
#ifndef LMP_NBIN_SSA_H
#define LMP_NBIN_SSA_H
#include "nbin_standard.h"
namespace LAMMPS_NS {
class NBinSSA : public NBinStandard {
public:
int *bins_ssa; // index of next atom in each bin
int maxbin_ssa; // size of bins_ssa array
int *binhead_ssa; // index of 1st local atom in each bin
int *gbinhead_ssa; // index of 1st ghost atom in each bin
int maxhead_ssa; // size of binhead_ssa and gbinhead_ssa arrays
NBinSSA(class LAMMPS *);
~NBinSSA();
void bin_atoms_setup(int);
void bin_atoms();
bigint memory_usage();
};
}
#endif
#endif
/* ERROR/WARNING messages:
*/

View File

@ -0,0 +1,260 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing authors:
James Larentzos and Timothy I. Mattox (Engility Corporation)
------------------------------------------------------------------------- */
#include "npair_half_bin_newton_ssa.h"
#include "neighbor.h"
#include "nstencil_ssa.h"
#include "nbin_ssa.h"
#include "neigh_list.h"
#include "atom.h"
#include "atom_vec.h"
#include "molecule.h"
#include "domain.h"
#include "group.h"
#include "memory.h"
#include "my_page.h"
#include "error.h"
using namespace LAMMPS_NS;
// allocate space for static class variable
// prototype for non-class function
static int *ssaAIRptr;
static int cmp_ssaAIR(const void *, const void *);
/* ---------------------------------------------------------------------- */
NPairHalfBinNewtonSSA::NPairHalfBinNewtonSSA(LAMMPS *lmp) : NPair(lmp) {}
/* ----------------------------------------------------------------------
binned neighbor list construction with full Newton's 3rd law
for use by Shardlow Spliting Algorithm
each owned atom i checks its own bin and other bins in Newton stencil
every pair stored exactly once by some processor
------------------------------------------------------------------------- */
void NPairHalfBinNewtonSSA::build(NeighList *list)
{
int i,j,k,n,itype,jtype,ibin,which,imol,iatom,moltemplate;
tagint tagprev;
double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
int *neighptr;
double **x = atom->x;
int *type = atom->type;
int *mask = atom->mask;
tagint *tag = atom->tag;
tagint *molecule = atom->molecule;
tagint **special = atom->special;
int **nspecial = atom->nspecial;
int nlocal = atom->nlocal;
int nall = nlocal + atom->nghost;
if (includegroup) nlocal = atom->nfirst;
int *ssaAIR = atom->ssaAIR;
int *molindex = atom->molindex;
int *molatom = atom->molatom;
Molecule **onemols = atom->avec->onemols;
int molecular = atom->molecular;
if (molecular == 2) moltemplate = 1;
else moltemplate = 0;
int *ilist = list->ilist;
int *numneigh = list->numneigh;
int **firstneigh = list->firstneigh;
MyPage<int> *ipage = list->ipage;
NStencilSSA *ns_ssa = dynamic_cast<NStencilSSA*>(ns);
if (!ns_ssa) error->one(FLERR, "NStencil wasn't a NStencilSSA object");
int nstencil_half = ns_ssa->nstencil_half;
int nstencil_full = ns_ssa->nstencil;
NBinSSA *nb_ssa = dynamic_cast<NBinSSA*>(nb);
if (!nb_ssa) error->one(FLERR, "NBin wasn't a NBinSSA object");
int *bins_ssa = nb_ssa->bins_ssa;
int *binhead_ssa = nb_ssa->binhead_ssa;
int *gbinhead_ssa = nb_ssa->gbinhead_ssa;
int inum = 0;
ipage->reset();
// loop over owned atoms, storing half of the neighbors
for (i = 0; i < nlocal; i++) {
int AIRct[8] = { 0 };
n = 0;
neighptr = ipage->vget();
itype = type[i];
xtmp = x[i][0];
ytmp = x[i][1];
ztmp = x[i][2];
if (moltemplate) {
imol = molindex[i];
iatom = molatom[i];
tagprev = tag[i] - iatom - 1;
}
// loop over rest of local atoms in i's bin
// just store them, since j is beyond i in linked list
for (j = bins_ssa[i]; j >= 0; j = bins_ssa[j]) {
jtype = type[j];
if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
if (rsq <= cutneighsq[itype][jtype]) {
if (molecular) {
if (!moltemplate)
which = find_special(special[i],nspecial[i],tag[j]);
else if (imol >= 0)
which = find_special(onemols[imol]->special[iatom],
onemols[imol]->nspecial[iatom],
tag[j]-tagprev);
else which = 0;
if (which == 0) neighptr[n++] = j;
else if (domain->minimum_image_check(delx,dely,delz))
neighptr[n++] = j;
else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
} else neighptr[n++] = j;
}
}
ibin = coord2bin(x[i]);
// loop over all local atoms in other bins in "half" stencil
for (k = 0; k < nstencil_half; k++) {
for (j = binhead_ssa[ibin+stencil[k]]; j >= 0;
j = bins_ssa[j]) {
jtype = type[j];
if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
if (rsq <= cutneighsq[itype][jtype]) {
if (molecular) {
if (!moltemplate)
which = find_special(special[i],nspecial[i],tag[j]);
else if (imol >= 0)
which = find_special(onemols[imol]->special[iatom],
onemols[imol]->nspecial[iatom],
tag[j]-tagprev);
else which = 0;
if (which == 0) neighptr[n++] = j;
else if (domain->minimum_image_check(delx,dely,delz))
neighptr[n++] = j;
else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
} else neighptr[n++] = j;
}
}
}
AIRct[0] = n;
// loop over AIR ghost atoms in all bins in "full" stencil
// Note: the non-AIR ghost atoms have already been filtered out
// That is a significant time savings because of the "full" stencil
// Note2: only non-pure locals can have ghosts as neighbors
if (ssaAIR[i] == 1) for (k = 0; k < nstencil_full; k++) {
for (j = gbinhead_ssa[ibin+stencil[k]]; j >= 0;
j = bins_ssa[j]) {
jtype = type[j];
if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
if (rsq <= cutneighsq[itype][jtype]) {
if (molecular) {
if (!moltemplate)
which = find_special(special[i],nspecial[i],tag[j]);
else if (imol >= 0)
which = find_special(onemols[imol]->special[iatom],
onemols[imol]->nspecial[iatom],
tag[j]-tagprev);
else which = 0;
if (which == 0) {
neighptr[n++] = j;
++(AIRct[ssaAIR[j] - 1]);
} else if (domain->minimum_image_check(delx,dely,delz)) {
neighptr[n++] = j;
++(AIRct[ssaAIR[j] - 1]);
} else if (which > 0) {
neighptr[n++] = j ^ (which << SBBITS);
++(AIRct[ssaAIR[j] - 1]);
}
} else {
neighptr[n++] = j;
++(AIRct[ssaAIR[j] - 1]);
}
}
}
}
ilist[inum++] = i;
firstneigh[i] = neighptr;
numneigh[i] = n;
ipage->vgot(n);
if (ipage->status())
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
// sort the ghosts in the neighbor list by their ssaAIR number
ssaAIRptr = atom->ssaAIR;
qsort(&(neighptr[AIRct[0]]), n - AIRct[0], sizeof(int), cmp_ssaAIR);
// do a prefix sum on the counts to turn them into indexes
list->ndxAIR_ssa[i][0] = AIRct[0];
for (int ndx = 1; ndx < 8; ++ndx) {
list->ndxAIR_ssa[i][ndx] = AIRct[ndx] + list->ndxAIR_ssa[i][ndx - 1];
}
}
list->inum = inum;
}
/* ----------------------------------------------------------------------
comparison function invoked by qsort()
accesses static class member ssaAIRptr, set before call to qsort()
------------------------------------------------------------------------- */
static int cmp_ssaAIR(const void *iptr, const void *jptr)
{
int i = *((int *) iptr);
int j = *((int *) jptr);
if (ssaAIRptr[i] < ssaAIRptr[j]) return -1;
if (ssaAIRptr[i] > ssaAIRptr[j]) return 1;
return 0;
}

View File

@ -0,0 +1,43 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef NPAIR_CLASS
NPairStyle(half/bin/newton/ssa,
NPairHalfBinNewtonSSA,
NP_HALF | NP_BIN | NP_NEWTON | NP_ORTHO | NP_SSA)
#else
#ifndef LMP_NPAIR_HALF_BIN_NEWTON_SSA_H
#define LMP_NPAIR_HALF_BIN_NEWTON_SSA_H
#include "npair.h"
namespace LAMMPS_NS {
class NPairHalfBinNewtonSSA : public NPair {
public:
NPairHalfBinNewtonSSA(class LAMMPS *);
~NPairHalfBinNewtonSSA() {}
void build(class NeighList *);
};
}
#endif
#endif
/* ERROR/WARNING messages:
*/

View File

@ -0,0 +1,132 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing authors:
James Larentzos and Timothy I. Mattox (Engility Corporation)
------------------------------------------------------------------------- */
#include "npair_halffull_newton_ssa.h"
#include "neighbor.h"
#include "neigh_list.h"
#include "atom.h"
#include "atom_vec.h"
#include "molecule.h"
#include "domain.h"
#include "my_page.h"
#include "error.h"
using namespace LAMMPS_NS;
// allocate space for static class variable
// prototype for non-class function
static int *ssaAIRptr;
static int cmp_ssaAIR(const void *, const void *);
/* ---------------------------------------------------------------------- */
NPairHalffullNewtonSSA::NPairHalffullNewtonSSA(LAMMPS *lmp) : NPair(lmp) {}
/* ----------------------------------------------------------------------
build half list from full list for use by Shardlow Spliting Algorithm
pair stored once if i,j are both owned and i < j
if j is ghost, only store if j coords are "above and to the right" of i
works if full list is a skip list
------------------------------------------------------------------------- */
void NPairHalffullNewtonSSA::build(NeighList *list)
{
int i,j,ii,jj,n,jnum,joriginal;
int *neighptr,*jlist;
int nlocal = atom->nlocal;
int *ssaAIR = atom->ssaAIR;
int *ilist = list->ilist;
int *numneigh = list->numneigh;
int **firstneigh = list->firstneigh;
MyPage<int> *ipage = list->ipage;
int *ilist_full = list->listfull->ilist;
int *numneigh_full = list->listfull->numneigh;
int **firstneigh_full = list->listfull->firstneigh;
int inum_full = list->listfull->inum;
int inum = 0;
ipage->reset();
// loop over parent full list
for (ii = 0; ii < inum_full; ii++) {
int AIRct[8] = { 0 };
n = 0;
neighptr = ipage->vget();
i = ilist_full[ii];
// loop over full neighbor list
jlist = firstneigh_full[i];
jnum = numneigh_full[i];
for (jj = 0; jj < jnum; jj++) {
joriginal = jlist[jj];
j = joriginal & NEIGHMASK;
if (j < nlocal) {
if (i > j) continue;
++(AIRct[0]);
} else {
if (ssaAIR[j] < 2) continue; // skip ghost atoms not in AIR
++(AIRct[ssaAIR[j] - 1]);
}
neighptr[n++] = joriginal;
}
ilist[inum++] = i;
firstneigh[i] = neighptr;
numneigh[i] = n;
ipage->vgot(n);
if (ipage->status())
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
// sort the locals+ghosts in the neighbor list by their ssaAIR number
ssaAIRptr = atom->ssaAIR;
qsort(&(neighptr[0]), n, sizeof(int), cmp_ssaAIR);
// do a prefix sum on the counts to turn them into indexes
list->ndxAIR_ssa[i][0] = AIRct[0];
for (int ndx = 1; ndx < 8; ++ndx) {
list->ndxAIR_ssa[i][ndx] = AIRct[ndx] + list->ndxAIR_ssa[i][ndx - 1];
}
}
list->inum = inum;
}
/* ----------------------------------------------------------------------
comparison function invoked by qsort()
accesses static class member ssaAIRptr, set before call to qsort()
------------------------------------------------------------------------- */
static int cmp_ssaAIR(const void *iptr, const void *jptr)
{
int i = *((int *) iptr);
int j = *((int *) jptr);
if (ssaAIRptr[i] < ssaAIRptr[j]) return -1;
if (ssaAIRptr[i] > ssaAIRptr[j]) return 1;
return 0;
}

View File

@ -0,0 +1,44 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef NPAIR_CLASS
NPairStyle(halffull/newton/ssa,
NPairHalffullNewtonSSA,
NP_HALFFULL | NP_NSQ | NP_BIN | NP_MULTI | NP_NEWTON |
NP_ORTHO | NP_TRI | NP_SSA)
#else
#ifndef LMP_NPAIR_HALFFULL_NEWTON_SSA_H
#define LMP_NPAIR_HALFFULL_NEWTON_SSA_H
#include "npair.h"
namespace LAMMPS_NS {
class NPairHalffullNewtonSSA : public NPair {
public:
NPairHalffullNewtonSSA(class LAMMPS *);
~NPairHalffullNewtonSSA() {}
void build(class NeighList *);
};
}
#endif
#endif
/* ERROR/WARNING messages:
*/

View File

@ -0,0 +1,64 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing authors:
James Larentzos and Timothy I. Mattox (Engility Corporation)
------------------------------------------------------------------------- */
#include "nstencil_half_bin_2d_newton_ssa.h"
#include "neighbor.h"
#include "neigh_list.h"
using namespace LAMMPS_NS;
/* ---------------------------------------------------------------------- */
NStencilHalfBin2dNewtonSSA::NStencilHalfBin2dNewtonSSA(LAMMPS *lmp) :
NStencilSSA(lmp) {}
/* ----------------------------------------------------------------------
create stencil based on bin geometry and cutoff
stencil = bins whose closest corner to central bin is within cutoff
sx,sy,sz = bin bounds = furthest the stencil could possibly extend
3d creates xyz stencil, 2d creates xy stencil
for half list with newton on:
stencil is bins to the "upper right" of central bin
stencil does not include self
additionally, includes the bins beyond nstencil that are needed
to locate all the Active Interaction Region (AIR) ghosts for SSA
------------------------------------------------------------------------- */
void NStencilHalfBin2dNewtonSSA::create()
{
int i,j,pos = 0;
for (j = 0; j <= sy; j++)
for (i = -sx; i <= sx; i++)
if (j > 0 || (j == 0 && i > 0))
if (bin_distance(i,j,0) < cutneighmaxsq)
stencil[pos++] = j*mbinx + i;
nstencil_half = pos; // record where normal half stencil ends
// include additional bins for AIR ghosts only
for (j = -sy; j <= 0; j++)
for (i = -sx; i <= sx; i++) {
if (j == 0 && i > 0) continue;
if (bin_distance(i,j,0) < cutneighmaxsq)
stencil[pos++] = j*mbinx + i;
}
nstencil = pos; // record where full stencil ends
}

View File

@ -0,0 +1,43 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef NSTENCIL_CLASS
NStencilStyle(half/bin/2d/newton/ssa,
NStencilHalfBin2dNewtonSSA,
NS_HALF | NS_BIN | NS_2D | NS_NEWTON | NS_SSA | NS_ORTHO)
#else
#ifndef LMP_NSTENCIL_HALF_BIN_2D_NEWTON_SSA_H
#define LMP_NSTENCIL_HALF_BIN_2D_NEWTON_SSA_H
#include "nstencil_ssa.h"
namespace LAMMPS_NS {
class NStencilHalfBin2dNewtonSSA : public NStencilSSA {
public:
NStencilHalfBin2dNewtonSSA(class LAMMPS *);
~NStencilHalfBin2dNewtonSSA() {}
void create();
};
}
#endif
#endif
/* ERROR/WARNING messages:
*/

View File

@ -0,0 +1,74 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing authors:
James Larentzos and Timothy I. Mattox (Engility Corporation)
------------------------------------------------------------------------- */
#include "nstencil_half_bin_3d_newton_ssa.h"
#include "neighbor.h"
#include "neigh_list.h"
using namespace LAMMPS_NS;
/* ---------------------------------------------------------------------- */
NStencilHalfBin3dNewtonSSA::NStencilHalfBin3dNewtonSSA(LAMMPS *lmp) :
NStencilSSA(lmp) {}
/* ----------------------------------------------------------------------
create stencil based on bin geometry and cutoff
stencil = bins whose closest corner to central bin is within cutoff
sx,sy,sz = bin bounds = furthest the stencil could possibly extend
3d creates xyz stencil, 2d creates xy stencil
for half list with newton on:
stencil is bins to the "upper right" of central bin
stencil does not include self
additionally, includes the bins beyond nstencil that are needed
to locate all the Active Interaction Region (AIR) ghosts for SSA
------------------------------------------------------------------------- */
void NStencilHalfBin3dNewtonSSA::create()
{
int i,j,k,pos = 0;
for (k = 0; k <= sz; k++)
for (j = -sy; j <= sy; j++)
for (i = -sx; i <= sx; i++)
if (k > 0 || j > 0 || (j == 0 && i > 0))
if (bin_distance(i,j,k) < cutneighmaxsq)
stencil[pos++] = k*mbiny*mbinx + j*mbinx + i;
nstencil_half = pos; // record where normal half stencil ends
// include additional bins for AIR ghosts only
for (k = -sz; k < 0; k++)
for (j = -sy; j <= sy; j++)
for (i = -sx; i <= sx; i++)
if (bin_distance(i,j,k) < cutneighmaxsq)
stencil[pos++] = k*mbiny*mbinx + j*mbinx + i;
// For k==0, make sure to skip already included bins
k = 0;
for (j = -sy; j <= 0; j++)
for (i = -sx; i <= sx; i++) {
if (j == 0 && i > 0) continue;
if (bin_distance(i,j,k) < cutneighmaxsq)
stencil[pos++] = k*mbiny*mbinx + j*mbinx + i;
}
nstencil = pos; // record where full stencil ends
}

View File

@ -0,0 +1,43 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef NSTENCIL_CLASS
NStencilStyle(half/bin/3d/newton/ssa,
NStencilHalfBin3dNewtonSSA,
NS_HALF | NS_BIN | NS_3D | NS_NEWTON | NS_SSA | NS_ORTHO)
#else
#ifndef LMP_NSTENCIL_HALF_BIN_3D_NEWTON_SSA_H
#define LMP_NSTENCIL_HALF_BIN_3D_NEWTON_SSA_H
#include "nstencil_ssa.h"
namespace LAMMPS_NS {
class NStencilHalfBin3dNewtonSSA : public NStencilSSA {
public:
NStencilHalfBin3dNewtonSSA(class LAMMPS *);
~NStencilHalfBin3dNewtonSSA() {}
void create();
};
}
#endif
#endif
/* ERROR/WARNING messages:
*/

View File

@ -11,12 +11,26 @@
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifndef LMP_NSTENCIL_SSA_H
#define LMP_NSTENCIL_SSA_H
#include "nstencil.h"
namespace LAMMPS_NS {
class NStencilSSA : public NStencil {
public:
NStencilSSA(class LAMMPS *lmp) : NStencil(lmp) { }
~NStencilSSA() {}
virtual void create() = 0;
int nstencil_half; // where the half stencil ends
};
}
#endif
/* ERROR/WARNING messages:
E: Neighbor list overflow, boost neigh_modify one
There are too many neighbors of a single atom. Use the neigh_modify
command to increase the max number of neighbors allowed for one atom.
You may also want to boost the page size.
*/

View File

@ -320,11 +320,9 @@ void PairDPDfdt::init_style()
splitFDT_flag = false;
int irequest = neighbor->request(this,instance_me);
neighbor->requests[irequest]->ssa = 0;
for (int i = 0; i < modify->nfix; i++)
if (strcmp(modify->fix[i]->style,"shardlow") == 0){
splitFDT_flag = true;
neighbor->requests[irequest]->ssa = 1;
}
}

View File

@ -408,11 +408,9 @@ void PairDPDfdtEnergy::init_style()
splitFDT_flag = false;
int irequest = neighbor->request(this,instance_me);
neighbor->requests[irequest]->ssa = 0;
for (int i = 0; i < modify->nfix; i++)
if (strcmp(modify->fix[i]->style,"shardlow") == 0){
splitFDT_flag = true;
neighbor->requests[irequest]->ssa = 1;
}
bool eos_flag = false;

View File

@ -3,10 +3,6 @@
mode=$1
# enforce using portable C locale
LC_ALL=C
export LC_ALL
# arg1 = file, arg2 = file it depends on
action () {
@ -44,6 +40,10 @@ action intel_preprocess.h
action intel_buffers.h
action intel_buffers.cpp
action math_extra_intel.h
action nbin_intel.h
action nbin_intel.cpp
action npair_intel.h
action npair_intel.cpp
action intel_simd.h pair_sw_intel.cpp
action intel_intrinsics.h pair_tersoff_intel.cpp
action verlet_lrt_intel.h pppm.cpp
@ -58,18 +58,10 @@ if (test $mode = 1) then
sed -i -e 's|^PKG_INC =[ \t]*|&-DLMP_USER_INTEL |' ../Makefile.package
fi
# force rebuild of files with LMP_USER_INTEL switch
touch ../accelerator_intel.h
elif (test $mode = 0) then
if (test -e ../Makefile.package) then
sed -i -e 's/[^ \t]*INTEL[^ \t]* //' ../Makefile.package
fi
# force rebuild of files with LMP_USER_INTEL switch
touch ../accelerator_intel.h
fi

View File

@ -317,8 +317,6 @@ void FixIntel::init()
error->all(FLERR,
"Currently, cannot use more than one intel style with hybrid.");
neighbor->fix_intel = (void *)this;
check_neighbor_intel();
if (_precision_mode == PREC_MODE_SINGLE)
_single_buffers->zero_ev();

View File

@ -26,18 +26,17 @@ IntelBuffers<flt_t, acc_t>::IntelBuffers(class LAMMPS *lmp_in) :
_buf_size(0), _buf_local_size(0) {
_list_alloc_atoms = 0;
_ntypes = 0;
_off_map_maxlocal = 0;
_off_map_listlocal = 0;
_ccachex = 0;
_host_nmax = 0;
#ifdef _LMP_INTEL_OFFLOAD
_separate_buffers = 0;
_off_f = 0;
_off_map_ilist = 0;
_off_map_nmax = 0;
_off_map_maxhead = 0;
_off_list_alloc = false;
_off_threads = 0;
_off_ccache = 0;
_host_nmax = 0;
#endif
}
@ -173,21 +172,15 @@ void IntelBuffers<flt_t, acc_t>::free_nmax()
const int * tag = _off_map_tag;
const int * special = _off_map_special;
const int * nspecial = _off_map_nspecial;
const int * bins = _off_map_bins;
const int * binpacked = _binpacked;
if (tag != 0 && special != 0 && nspecial !=0 && bins != 0) {
if (tag != 0 && special != 0 && nspecial !=0) {
#pragma offload_transfer target(mic:_cop) \
nocopy(tag:alloc_if(0) free_if(1)) \
nocopy(special,nspecial:alloc_if(0) free_if(1)) \
nocopy(bins,binpacked:alloc_if(0) free_if(1))
nocopy(special,nspecial:alloc_if(0) free_if(1))
}
_off_map_nmax = 0;
}
#endif
if (_host_nmax > 0) {
lmp->memory->destroy(_binpacked);
_host_nmax = 0;
}
#endif
}
/* ---------------------------------------------------------------------- */
@ -195,12 +188,11 @@ void IntelBuffers<flt_t, acc_t>::free_nmax()
template <class flt_t, class acc_t>
void IntelBuffers<flt_t, acc_t>::_grow_nmax(const int offload_end)
{
#ifdef _LMP_INTEL_OFFLOAD
free_nmax();
int size = lmp->atom->nmax;
_host_nmax = size;
lmp->memory->create(_binpacked, _host_nmax, "_binpacked");
#ifdef _LMP_INTEL_OFFLOAD
if (!offload_end) return;
int *special, *nspecial;
int tag_length, special_length, nspecial_length;
@ -220,10 +212,7 @@ void IntelBuffers<flt_t, acc_t>::_grow_nmax(const int offload_end)
else
tag_length = 1;
int *tag = lmp->atom->tag;
int *bins = lmp->neighbor->bins;
int * binpacked = _binpacked;
#pragma offload_transfer target(mic:_cop) \
nocopy(bins,binpacked:length(size) alloc_if(1) free_if(0)) \
nocopy(tag:length(tag_length) alloc_if(1) free_if(0)) \
nocopy(special:length(special_length) alloc_if(1) free_if(0)) \
nocopy(nspecial:length(nspecial_length) alloc_if(1) free_if(0))
@ -231,18 +220,16 @@ void IntelBuffers<flt_t, acc_t>::_grow_nmax(const int offload_end)
_off_map_special = special;
_off_map_nspecial = nspecial;
_off_map_nmax = size;
_off_map_bins = bins;
#endif
}
/* ---------------------------------------------------------------------- */
template <class flt_t, class acc_t>
void IntelBuffers<flt_t, acc_t>::free_local()
void IntelBuffers<flt_t, acc_t>::free_list_local()
{
if (_off_map_maxlocal > 0) {
if (_off_map_listlocal > 0) {
int * cnumneigh = _cnumneigh;
int * atombin = _atombin;
#ifdef _LMP_INTEL_OFFLOAD
if (_off_map_ilist != NULL) {
const int * ilist = _off_map_ilist;
@ -250,40 +237,36 @@ void IntelBuffers<flt_t, acc_t>::free_local()
_off_map_ilist = NULL;
if (numneigh != 0 && ilist != 0) {
#pragma offload_transfer target(mic:_cop) \
nocopy(ilist,numneigh,cnumneigh,atombin:alloc_if(0) free_if(1))
nocopy(ilist,numneigh,cnumneigh:alloc_if(0) free_if(1))
}
}
#endif
lmp->memory->destroy(cnumneigh);
lmp->memory->destroy(atombin);
_off_map_maxlocal = 0;
_off_map_listlocal = 0;
}
}
/* ---------------------------------------------------------------------- */
template <class flt_t, class acc_t>
void IntelBuffers<flt_t, acc_t>::_grow_local(NeighList *list,
void IntelBuffers<flt_t, acc_t>::_grow_list_local(NeighList *list,
const int offload_end)
{
free_local();
free_list_local();
int size = list->get_maxlocal();
lmp->memory->create(_cnumneigh, size, "_cnumneigh");
lmp->memory->create(_atombin, size, "_atombin");
_off_map_maxlocal = size;
_off_map_listlocal = size;
#ifdef _LMP_INTEL_OFFLOAD
if (offload_end > 0) {
int * numneigh = list->numneigh;
int * ilist = list->ilist;
int * cnumneigh = _cnumneigh;
int * atombin = _atombin;
if (cnumneigh != 0 && atombin != 0) {
if (cnumneigh != 0) {
#pragma offload_transfer target(mic:_cop) \
nocopy(ilist:length(size) alloc_if(1) free_if(0)) \
nocopy(numneigh:length(size) alloc_if(1) free_if(0)) \
nocopy(cnumneigh:length(size) alloc_if(1) free_if(0)) \
nocopy(atombin:length(size) alloc_if(1) free_if(0))
nocopy(cnumneigh:length(size) alloc_if(1) free_if(0))
}
_off_map_ilist = ilist;
_off_map_numneigh = numneigh;
@ -293,39 +276,6 @@ void IntelBuffers<flt_t, acc_t>::_grow_local(NeighList *list,
/* ---------------------------------------------------------------------- */
template <class flt_t, class acc_t>
void IntelBuffers<flt_t, acc_t>::free_binhead()
{
#ifdef _LMP_INTEL_OFFLOAD
if (_off_map_maxhead > 0) {
const int * binhead = _off_map_binhead;
if (binhead !=0) {
#pragma offload_transfer target(mic:_cop) \
nocopy(binhead:alloc_if(0) free_if(1))
}
_off_map_maxhead = 0;
}
#endif
}
/* ---------------------------------------------------------------------- */
template <class flt_t, class acc_t>
void IntelBuffers<flt_t, acc_t>::_grow_binhead()
{
#ifdef _LMP_INTEL_OFFLOAD
free_binhead();
int * binhead = lmp->neighbor->binhead;
const int maxhead = lmp->neighbor->maxhead;
#pragma offload_transfer target(mic:_cop) \
nocopy(binhead:length(maxhead+1) alloc_if(1) free_if(0))
_off_map_binhead = binhead;
_off_map_maxhead = maxhead;
#endif
}
/* ---------------------------------------------------------------------- */
template <class flt_t, class acc_t>
void IntelBuffers<flt_t, acc_t>::free_nbor_list()
{
@ -333,11 +283,8 @@ void IntelBuffers<flt_t, acc_t>::free_nbor_list()
#ifdef _LMP_INTEL_OFFLOAD
if (_off_list_alloc) {
int * list_alloc = _list_alloc;
int * stencil = _off_map_stencil;
if (list_alloc != 0 && stencil != 0) {
#pragma offload_transfer target(mic:_cop) \
nocopy(list_alloc:alloc_if(0) free_if(1))
}
_off_list_alloc = false;
}
#endif
@ -364,33 +311,16 @@ void IntelBuffers<flt_t, acc_t>::_grow_nbor_list(NeighList *list,
#ifdef _LMP_INTEL_OFFLOAD
if (offload_end > 0) {
int * list_alloc =_list_alloc;
int * stencil = list->stencil;
if (list_alloc != NULL) {
#pragma offload_transfer target(mic:_cop) \
in(stencil:length(list->maxstencil) alloc_if(1) free_if(0)) \
nocopy(list_alloc:length(list_alloc_size) alloc_if(1) free_if(0))
_off_map_stencil = stencil;
_off_list_alloc = true;
}
}
#endif
}
template <class flt_t, class acc_t>
void IntelBuffers<flt_t, acc_t>::_grow_stencil(NeighList *list)
{
#ifdef _LMP_INTEL_OFFLOAD
int * stencil = _off_map_stencil;
#pragma offload_transfer target(mic:_cop) \
nocopy(stencil:alloc_if(0) free_if(1))
stencil = list->stencil;
#pragma offload_transfer target(mic:_cop) \
in(stencil:length(list->maxstencil) alloc_if(1) free_if(0))
_off_map_stencil = stencil;
#endif
}
/* ---------------------------------------------------------------------- */
template <class flt_t, class acc_t>
@ -544,7 +474,6 @@ double IntelBuffers<flt_t, acc_t>::memory_usage(const int nthreads)
if (_off_f) tmem += fstride*_off_threads * sizeof(vec3_acc_t);
#endif
tmem += _off_map_maxlocal * sizeof(int) * 2;
tmem += (_list_alloc_atoms + _off_threads) * get_max_nbors() * sizeof(int);
tmem += _ntypes * _ntypes * sizeof(int);

View File

@ -61,50 +61,35 @@ class IntelBuffers {
}
void free_buffers();
void free_nmax();
inline void set_bininfo(int *atombin, int *binpacked)
{ _atombin = atombin; _binpacked = binpacked; }
inline void grow(const int nall, const int nlocal, const int nthreads,
const int offload_end) {
if (nall >= _buf_size || nlocal >= _buf_local_size)
_grow(nall, nlocal, nthreads, offload_end);
#ifdef _LMP_INTEL_OFFLOAD
if (lmp->atom->nmax > _host_nmax)
_grow_nmax(offload_end);
#endif
}
inline void free_all_nbor_buffers() {
free_nbor_list();
free_nmax();
free_binhead();
free_local();
free_list_local();
}
inline void grow_nbor(NeighList *list, const int nlocal, const int nthreads,
inline void grow_list(NeighList *list, const int nlocal, const int nthreads,
const int offload_end, const int pack_width=1) {
grow_local(list, offload_end);
grow_nmax(offload_end);
if (offload_end)
grow_binhead();
grow_list_local(list, offload_end);
grow_nbor_list(list, nlocal, nthreads, offload_end, pack_width);
}
void free_nmax();
inline void grow_nmax(const int offload_end) {
if (lmp->atom->nmax > _host_nmax)
_grow_nmax(offload_end);
}
void free_local();
inline void grow_local(NeighList *list, const int offload_end) {
if (list->get_maxlocal() > _off_map_maxlocal)
_grow_local(list, offload_end);
}
void free_binhead();
inline void grow_binhead() {
#ifdef _LMP_INTEL_OFFLOAD
if (lmp->neighbor->maxhead > _off_map_maxhead)
_grow_binhead();
#endif
void free_list_local();
inline void grow_list_local(NeighList *list, const int offload_end) {
if (list->get_maxlocal() > _off_map_listlocal)
_grow_list_local(list, offload_end);
}
void free_ccache();
@ -134,19 +119,15 @@ class IntelBuffers {
const int pack_width) {
if (nlocal > _list_alloc_atoms)
_grow_nbor_list(list, nlocal, nthreads, offload_end, pack_width);
#ifdef _LMP_INTEL_OFFLOAD
else if (offload_end > 0 && _off_map_stencil != list->stencil)
_grow_stencil(list);
#endif
}
void set_ntypes(const int ntypes);
inline int * firstneigh(const NeighList *list) { return _list_alloc; }
inline int * cnumneigh(const NeighList *list) { return _cnumneigh; }
inline int * get_atombin() { return _atombin; }
inline int * get_binpacked() { return _binpacked; }
inline atom_t * get_x(const int offload = 1) {
#ifdef _LMP_INTEL_OFFLOAD
if (_separate_buffers && offload == 0) return _host_x;
@ -271,13 +252,10 @@ class IntelBuffers {
flt_t *_q;
quat_t *_quat;
vec3_acc_t * _f;
int _off_threads, _off_map_maxlocal;
int _off_threads, _off_map_listlocal;
int _list_alloc_atoms;
int * _list_alloc;
int * _cnumneigh;
int * _atombin;
int * _binpacked;
int *_list_alloc, *_cnumneigh, *_atombin, *_binpacked;
flt_t **_cutneighsq;
int _ntypes;
@ -296,26 +274,24 @@ class IntelBuffers {
flt_t *_host_q;
quat_t *_host_quat;
vec3_acc_t *_off_f;
int _off_map_nmax, _off_map_maxhead, _cop, _off_ccache;
int _off_map_nmax, _cop, _off_ccache;
int *_off_map_ilist;
int *_off_map_stencil, *_off_map_special, *_off_map_nspecial, *_off_map_tag;
int *_off_map_binhead, *_off_map_bins, *_off_map_numneigh;
int *_off_map_special, *_off_map_nspecial, *_off_map_tag;
int *_off_map_numneigh;
bool _off_list_alloc;
int _need_tag;
int _need_tag, _host_nmax;
#endif
int _buf_size, _buf_local_size, _host_nmax;
int _buf_size, _buf_local_size;
_alignvar(acc_t _ev_global[8],64);
_alignvar(acc_t _ev_global_host[8],64);
void _grow(const int nall, const int nlocal, const int nthreads,
const int offload_end);
void _grow_nmax(const int offload_end);
void _grow_local(NeighList *list, const int offload_end);
void _grow_binhead();
void _grow_list_local(NeighList *list, const int offload_end);
void _grow_nbor_list(NeighList *list, const int nlocal, const int nthreads,
const int offload_end, const int pack_width);
void _grow_stencil(NeighList *list);
};
}

View File

@ -0,0 +1,253 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: W. Michael Brown (Intel)
------------------------------------------------------------------------- */
#include "nbin_intel.h"
#include "atom.h"
#include "group.h"
#include "domain.h"
#include "comm.h"
#include "update.h"
#include "error.h"
using namespace LAMMPS_NS;
/* ---------------------------------------------------------------------- */
NBinIntel::NBinIntel(LAMMPS *lmp) : NBinStandard(lmp) {
int ifix = modify->find_fix("package_intel");
if (ifix < 0)
error->all(FLERR,
"The 'package intel' command is required for /intel styles");
_fix = static_cast<FixIntel *>(modify->fix[ifix]);
_precision_mode = _fix->precision();
_atombin = NULL;
_binpacked = NULL;
#ifdef _LMP_INTEL_OFFLOAD
_cop = _fix->coprocessor_number();
_offload_alloc = 0;
#endif
}
/* ---------------------------------------------------------------------- */
NBinIntel::~NBinIntel() {
#ifdef _LMP_INTEL_OFFLOAD
if (_offload_alloc) {
const int * binhead = this->binhead;
const int * bins = this->bins;
const int * _atombin = this->_atombin;
const int * _binpacked = this->_binpacked;
#pragma offload_transfer target(mic:_cop) \
nocopy(binhead,bins,_atombin,_binpacked:alloc_if(0) free_if(1))
}
#endif
}
/* ----------------------------------------------------------------------
setup for bin_atoms()
------------------------------------------------------------------------- */
void NBinIntel::bin_atoms_setup(int nall)
{
// binhead = per-bin vector, mbins in length
// add 1 bin for USER-INTEL package
if (mbins > maxbin) {
#ifdef _LMP_INTEL_OFFLOAD
if (_offload_alloc) {
const int * binhead = this->binhead;
#pragma offload_transfer target(mic:_cop) \
nocopy(binhead:alloc_if(0) free_if(1))
}
#endif
maxbin = mbins;
memory->destroy(binhead);
memory->create(binhead,maxbin+1,"neigh:binhead");
#ifdef _LMP_INTEL_OFFLOAD
if (_fix->offload_balance() != 0) {
int * binhead = this->binhead;
#pragma offload_transfer target(mic:_cop) \
nocopy(binhead:length(maxbin+1) alloc_if(1) free_if(0))
}
#endif
last_bin_memory = update->ntimestep;
}
// bins = per-atom vector
if (nall > maxatom) {
maxatom = nall;
#ifdef _LMP_INTEL_OFFLOAD
if (_offload_alloc) {
const int * bins = this->bins;
const int * _atombin = this->_atombin;
const int * _binpacked = this->_binpacked;
#pragma offload_transfer target(mic:_cop) \
nocopy(bins,_atombin,_binpacked:alloc_if(0) free_if(1))
}
#endif
memory->destroy(bins);
memory->destroy(_atombin);
memory->destroy(_binpacked);
memory->create(bins,maxatom,"neigh:bins");
memory->create(_atombin,maxatom,"neigh:bins");
memory->create(_binpacked,maxatom,"neigh:bins");
#ifdef _LMP_INTEL_OFFLOAD
if (_fix->offload_balance() != 0) {
const int * bins = this->bins;
const int * _atombin = this->_atombin;
const int * _binpacked = this->_binpacked;
#pragma offload_transfer target(mic:_cop) \
nocopy(bins,_atombin,_binpacked:length(maxatom) alloc_if(1) free_if(0))
_offload_alloc=1;
}
#endif
if (_precision_mode == FixIntel::PREC_MODE_MIXED)
_fix->get_mixed_buffers()->set_bininfo(_atombin,_binpacked);
else if (_precision_mode == FixIntel::PREC_MODE_SINGLE)
_fix->get_single_buffers()->set_bininfo(_atombin,_binpacked);
else
_fix->get_double_buffers()->set_bininfo(_atombin,_binpacked);
last_bin_memory = update->ntimestep;
}
last_bin = update->ntimestep;
}
/* ----------------------------------------------------------------------
bin owned and ghost atoms
------------------------------------------------------------------------- */
void NBinIntel::bin_atoms()
{
if (_precision_mode == FixIntel::PREC_MODE_MIXED)
bin_atoms(_fix->get_mixed_buffers());
else if (_precision_mode == FixIntel::PREC_MODE_SINGLE)
bin_atoms(_fix->get_single_buffers());
else
bin_atoms(_fix->get_double_buffers());
}
template <class flt_t, class acc_t>
void NBinIntel::bin_atoms(IntelBuffers<flt_t,acc_t> * buffers) {
const int nlocal = atom->nlocal;
const int nall = nlocal + atom->nghost;
const int aend = _fix->offload_end_neighbor();
// ---------- Sanity check for padding --------------
{
const flt_t dx = (INTEL_BIGP - bboxhi[0]);
const flt_t dy = (INTEL_BIGP - bboxhi[1]);
const flt_t dz = (INTEL_BIGP - bboxhi[2]);
if (dx * dx + dy * dy + dz * dz <
static_cast<flt_t>(neighbor->cutneighmaxsq))
error->one(FLERR,
"Intel package expects no atoms within cutoff of {1e15,1e15,1e15}.");
}
// ---------- Grow and cast/pack buffers -------------
_fix->start_watch(TIME_PACK);
buffers->grow(nall, atom->nlocal, comm->nthreads, aend);
ATOM_T biga;
biga.x = INTEL_BIGP;
biga.y = INTEL_BIGP;
biga.z = INTEL_BIGP;
biga.w = 1;
buffers->get_x()[nall] = biga;
const int nthreads = comm->nthreads;
#if defined(_OPENMP)
#pragma omp parallel default(none) shared(buffers)
#endif
{
int ifrom, ito, tid;
IP_PRE_omp_range_id_align(ifrom, ito, tid, nall, nthreads,
sizeof(ATOM_T));
buffers->thr_pack(ifrom, ito, 0);
}
_fix->stop_watch(TIME_PACK);
// ---------- Bin Atoms -------------
_fix->start_watch(TIME_HOST_NEIGHBOR);
const ATOM_T * _noalias const x = buffers->get_x();
int * _noalias const atombin = this->_atombin;
int * _noalias const binpacked = this->_binpacked;
const double sboxlo0 = bboxlo[0] + mbinxlo/bininvx;
const double sboxlo1 = bboxlo[1] + mbinylo/bininvy;
const double sboxlo2 = bboxlo[2] + mbinzlo/bininvz;
int i, ibin;
for (i = 0; i < mbins; i++) binhead[i] = -1;
int *mask = atom->mask;
if (includegroup) {
int bitmask = group->bitmask[includegroup];
for (i = nall-1; i >= nlocal; i--) {
if (mask[i] & bitmask) {
ibin = coord2bin(atom->x[i]);
bins[i] = binhead[ibin];
binhead[ibin] = i;
}
}
for (i = atom->nfirst-1; i >= 0; i--) {
ibin = coord2bin(atom->x[i]);
atombin[i] = ibin;
bins[i] = binhead[ibin];
binhead[ibin] = i;
}
} else {
for (i = nall-1; i >= nlocal; i--) {
ibin = coord2bin(atom->x[i]);
bins[i] = binhead[ibin];
binhead[ibin] = i;
}
for (i = nlocal-1; i >= 0; i--) {
ibin = coord2bin(atom->x[i]);
atombin[i]=ibin;
bins[i] = binhead[ibin];
binhead[ibin] = i;
}
}
int newhead = 0;
for (i = 0; i < mbins; i++) {
int j = binhead[i];
binhead[i] = newhead;
for ( ; j >= 0; j = bins[j])
binpacked[newhead++] = j;
}
binhead[mbins] = newhead;
}
/* ---------------------------------------------------------------------- */
bigint NBinIntel::memory_usage()
{
return NBinStandard::memory_usage() + maxatom*2*sizeof(int);
}

View File

@ -0,0 +1,69 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef NBIN_CLASS
NBinStyle(intel,
NBinIntel,
NB_INTEL)
#else
#ifndef LMP_NBIN_INTEL_H
#define LMP_NBIN_INTEL_H
#include "nbin_standard.h"
#include "fix_intel.h"
#include "memory.h"
namespace LAMMPS_NS {
class NBinIntel : public NBinStandard {
public:
NBinIntel(class LAMMPS *);
~NBinIntel();
void bin_atoms_setup(int);
void bin_atoms();
int * get_binpacked() { return _binpacked; }
private:
FixIntel *_fix;
int *_atombin, *_binpacked;
int _precision_mode;
bigint memory_usage();
template <class flt_t, class acc_t>
void bin_atoms(IntelBuffers<flt_t,acc_t> *);
#ifdef _LMP_INTEL_OFFLOAD
int _cop, _offload_alloc;
#endif
};
}
#endif
#endif
/* ERROR/WARNING messages:
E: The 'package intel' command is required for /intel styles
Self-explanatory.
E: Intel package expects no atoms within cutoff of {1e15,1e15,1e15}.
The Intel package can make use of dummy atoms for padding with a large position
that should not be within the cutoff.
*/

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,552 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: W. Michael Brown (Intel)
------------------------------------------------------------------------- */
#include "npair_full_bin_intel.h"
#include "neighbor.h"
#include "neigh_list.h"
#include "atom.h"
#include "comm.h"
#include "group.h"
using namespace LAMMPS_NS;
/* ---------------------------------------------------------------------- */
NPairFullBinIntel::NPairFullBinIntel(LAMMPS *lmp) : NPairIntel(lmp) {}
/* ----------------------------------------------------------------------
binned neighbor list construction for all neighbors
every neighbor pair appears in list of both atoms i and j
------------------------------------------------------------------------- */
void NPairFullBinIntel::build(NeighList *list)
{
if (nstencil > INTEL_MAX_STENCIL_CHECK)
error->all(FLERR, "Too many neighbor bins for USER-INTEL package.");
#ifdef _LMP_INTEL_OFFLOAD
if (exclude)
error->all(FLERR, "Exclusion lists not yet supported for Intel offload");
#endif
if (_fix->precision() == FixIntel::PREC_MODE_MIXED)
fbi(list, _fix->get_mixed_buffers());
else if (_fix->precision() == FixIntel::PREC_MODE_DOUBLE)
fbi(list, _fix->get_double_buffers());
else
fbi(list, _fix->get_single_buffers());
_fix->stop_watch(TIME_HOST_NEIGHBOR);
}
template <class flt_t, class acc_t>
void NPairFullBinIntel::
fbi(NeighList *list, IntelBuffers<flt_t,acc_t> *buffers) {
const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
list->inum = nlocal;
list->gnum = 0;
int host_start = _fix->host_start_neighbor();;
const int off_end = _fix->offload_end_neighbor();
#ifdef _LMP_INTEL_OFFLOAD
if (off_end) grow_stencil();
if (_fix->full_host_list()) host_start = 0;
int offload_noghost = _fix->offload_noghost();
#endif
buffers->grow_list(list, atom->nlocal, comm->nthreads, off_end,
_fix->nbor_pack_width());
int need_ic = 0;
if (atom->molecular)
dminimum_image_check(need_ic, neighbor->cutneighmax, neighbor->cutneighmax,
neighbor->cutneighmax);
#ifdef _LMP_INTEL_OFFLOAD
if (need_ic) {
if (offload_noghost) {
fbi<flt_t,acc_t,1,1>(1, list, buffers, 0, off_end);
fbi<flt_t,acc_t,1,1>(0, list, buffers, host_start, nlocal, off_end);
} else {
fbi<flt_t,acc_t,0,1>(1, list, buffers, 0, off_end);
fbi<flt_t,acc_t,0,1>(0, list, buffers, host_start, nlocal);
}
} else {
if (offload_noghost) {
fbi<flt_t,acc_t,1,0>(1, list, buffers, 0, off_end);
fbi<flt_t,acc_t,1,0>(0, list, buffers, host_start, nlocal, off_end);
} else {
fbi<flt_t,acc_t,0,0>(1, list, buffers, 0, off_end);
fbi<flt_t,acc_t,0,0>(0, list, buffers, host_start, nlocal);
}
}
#else
if (need_ic)
fbi<flt_t,acc_t,0,1>(0, list, buffers, host_start, nlocal);
else
fbi<flt_t,acc_t,0,0>(0, list, buffers, host_start, nlocal);
#endif
}
template <class flt_t, class acc_t, int offload_noghost, int need_ic>
void NPairFullBinIntel::
fbi(const int offload, NeighList *list, IntelBuffers<flt_t,acc_t> *buffers,
const int astart, const int aend, const int offload_end) {
if (aend-astart == 0) return;
const int nall = atom->nlocal + atom->nghost;
int pad = 1;
int nall_t = nall;
#ifdef _LMP_INTEL_OFFLOAD
if (offload_noghost && offload) nall_t = atom->nlocal;
#endif
const int pack_width = _fix->nbor_pack_width();
const int pad_width = pad;
const ATOM_T * _noalias const x = buffers->get_x();
int * _noalias const firstneigh = buffers->firstneigh(list);
const int e_nall = nall_t;
const int molecular = atom->molecular;
int *ns = NULL;
tagint *s = NULL;
int tag_size = 0, special_size;
if (buffers->need_tag()) tag_size = e_nall;
if (molecular) {
s = atom->special[0];
ns = atom->nspecial[0];
special_size = aend;
} else {
s = &buffers->_special_holder;
ns = &buffers->_nspecial_holder;
special_size = 0;
}
const tagint * _noalias const special = s;
const int * _noalias const nspecial = ns;
const int maxspecial = atom->maxspecial;
const tagint * _noalias const tag = atom->tag;
int * _noalias const ilist = list->ilist;
int * _noalias numneigh = list->numneigh;
int * _noalias const cnumneigh = buffers->cnumneigh(list);
const int nstencil = this->nstencil;
const int * _noalias const stencil = this->stencil;
const flt_t * _noalias const cutneighsq = buffers->get_cutneighsq()[0];
const int ntypes = atom->ntypes + 1;
const int nlocal = atom->nlocal;
#ifndef _LMP_INTEL_OFFLOAD
int * const mask = atom->mask;
tagint * const molecule = atom->molecule;
#endif
int tnum;
int *overflow;
double *timer_compute;
#ifdef _LMP_INTEL_OFFLOAD
if (offload) {
timer_compute = _fix->off_watch_neighbor();
tnum = buffers->get_off_threads();
overflow = _fix->get_off_overflow_flag();
_fix->stop_watch(TIME_HOST_NEIGHBOR);
_fix->start_watch(TIME_OFFLOAD_LATENCY);
} else
#endif
{
tnum = comm->nthreads;
overflow = _fix->get_overflow_flag();
}
const int nthreads = tnum;
const int maxnbors = buffers->get_max_nbors();
int * _noalias const atombin = buffers->get_atombin();
const int * _noalias const binpacked = buffers->get_binpacked();
const int xperiodic = domain->xperiodic;
const int yperiodic = domain->yperiodic;
const int zperiodic = domain->zperiodic;
const flt_t xprd_half = domain->xprd_half;
const flt_t yprd_half = domain->yprd_half;
const flt_t zprd_half = domain->zprd_half;
#ifdef _LMP_INTEL_OFFLOAD
const int * _noalias const binhead = this->binhead;
const int * _noalias const bins = this->bins;
const int cop = _fix->coprocessor_number();
const int separate_buffers = _fix->separate_buffers();
#pragma offload target(mic:cop) if(offload) \
in(x:length(e_nall+1) alloc_if(0) free_if(0)) \
in(tag:length(tag_size) alloc_if(0) free_if(0)) \
in(special:length(special_size*maxspecial) alloc_if(0) free_if(0)) \
in(nspecial:length(special_size*3) alloc_if(0) free_if(0)) \
in(bins,binpacked:length(nall) alloc_if(0) free_if(0)) \
in(binhead:length(mbins+1) alloc_if(0) free_if(0)) \
in(cutneighsq:length(0) alloc_if(0) free_if(0)) \
in(firstneigh:length(0) alloc_if(0) free_if(0)) \
in(cnumneigh:length(0) alloc_if(0) free_if(0)) \
out(numneigh:length(0) alloc_if(0) free_if(0)) \
in(ilist:length(0) alloc_if(0) free_if(0)) \
in(atombin:length(aend) alloc_if(0) free_if(0)) \
in(stencil:length(nstencil) alloc_if(0) free_if(0)) \
in(maxnbors,nthreads,maxspecial,nstencil,e_nall,offload,pack_width) \
in(offload_end,separate_buffers,astart, aend, nlocal, molecular, ntypes) \
in(xperiodic, yperiodic, zperiodic, xprd_half, yprd_half, zprd_half) \
out(overflow:length(5) alloc_if(0) free_if(0)) \
out(timer_compute:length(1) alloc_if(0) free_if(0)) \
signal(tag)
#endif
{
#if defined(__MIC__) && defined(_LMP_INTEL_OFFLOAD)
*timer_compute = MIC_Wtime();
#endif
#ifdef _LMP_INTEL_OFFLOAD
overflow[LMP_LOCAL_MIN] = astart;
overflow[LMP_LOCAL_MAX] = aend - 1;
overflow[LMP_GHOST_MIN] = e_nall;
overflow[LMP_GHOST_MAX] = -1;
#endif
int nstencilp = 0;
int binstart[INTEL_MAX_STENCIL], binend[INTEL_MAX_STENCIL];
for (int k = 0; k < nstencil; k++) {
binstart[nstencilp] = stencil[k];
int end = stencil[k] + 1;
for (int kk = k + 1; kk < nstencil; kk++) {
if (stencil[kk-1]+1 == stencil[kk]) {
end++;
k++;
} else break;
}
binend[nstencilp] = end;
nstencilp++;
}
#if defined(_OPENMP)
#pragma omp parallel default(none) \
shared(numneigh, overflow, nstencilp, binstart, binend)
#endif
{
#ifdef _LMP_INTEL_OFFLOAD
int lmin = e_nall, lmax = -1, gmin = e_nall, gmax = -1;
#endif
const int num = aend - astart;
int tid, ifrom, ito;
IP_PRE_omp_range_id_vec(ifrom, ito, tid, num, nthreads, pack_width);
ifrom += astart;
ito += astart;
int e_ito = ito;
if (ito == num) {
int imod = ito % pack_width;
if (imod) e_ito += pack_width - imod;
}
const int list_size = (e_ito + tid * 2 + 2) * maxnbors;
int which;
int pack_offset = maxnbors * pack_width;
int ct = (ifrom + tid * 2) * maxnbors;
int *neighptr = firstneigh + ct;
const int obound = pack_offset + maxnbors * 2;
int max_chunk = 0;
int lane = 0;
for (int i = ifrom; i < ito; i++) {
const flt_t xtmp = x[i].x;
const flt_t ytmp = x[i].y;
const flt_t ztmp = x[i].z;
const int itype = x[i].w;
const tagint itag = tag[i];
const int ioffset = ntypes * itype;
const int ibin = atombin[i];
int raw_count = pack_offset;
// loop over all atoms in surrounding bins in stencil including self
// skip i = j
if (exclude) {
for (int k = 0; k < nstencilp; k++) {
const int bstart = binhead[ibin + binstart[k]];
const int bend = binhead[ibin + binend[k]];
#ifndef _LMP_INTEL_OFFLOAD
#ifdef INTEL_VMASK
#pragma simd
#endif
#endif
for (int jj = bstart; jj < bend; jj++) {
int j = binpacked[jj];
if (i == j) j=e_nall;
#ifdef _LMP_INTEL_OFFLOAD
if (offload_noghost) {
if (j < nlocal) {
if (i < offload_end) continue;
} else if (offload) continue;
}
#endif
#ifndef _LMP_INTEL_OFFLOAD
const int jtype = x[j].w;
if (exclusion(i,j,itype,jtype,mask,molecule)) continue;
#endif
neighptr[raw_count++] = j;
}
}
} else {
for (int k = 0; k < nstencilp; k++) {
const int bstart = binhead[ibin + binstart[k]];
const int bend = binhead[ibin + binend[k]];
#ifndef _LMP_INTEL_OFFLOAD
#ifdef INTEL_VMASK
#pragma simd
#endif
#endif
for (int jj = bstart; jj < bend; jj++) {
int j = binpacked[jj];
if (i == j) j=e_nall;
#ifdef _LMP_INTEL_OFFLOAD
if (offload_noghost) {
if (j < nlocal) {
if (i < offload_end) continue;
} else if (offload) continue;
}
#endif
neighptr[raw_count++] = j;
}
}
}
if (raw_count > obound) *overflow = 1;
#if defined(LMP_SIMD_COMPILER)
#ifdef _LMP_INTEL_OFFLOAD
int vlmin = lmin, vlmax = lmax, vgmin = gmin, vgmax = gmax;
#if __INTEL_COMPILER+0 > 1499
#pragma vector aligned
#pragma simd reduction(max:vlmax,vgmax) reduction(min:vlmin, vgmin)
#endif
#else
#pragma vector aligned
#pragma simd
#endif
#endif
for (int u = pack_offset; u < raw_count; u++) {
int j = neighptr[u];
const flt_t delx = xtmp - x[j].x;
const flt_t dely = ytmp - x[j].y;
const flt_t delz = ztmp - x[j].z;
const int jtype = x[j].w;
const flt_t rsq = delx * delx + dely * dely + delz * delz;
if (rsq > cutneighsq[ioffset + jtype])
neighptr[u] = e_nall;
else {
if (need_ic) {
int no_special;
ominimum_image_check(no_special, delx, dely, delz);
if (no_special)
neighptr[u] = -j - 1;
}
#ifdef _LMP_INTEL_OFFLOAD
if (j < nlocal) {
if (j < vlmin) vlmin = j;
if (j > vlmax) vlmax = j;
} else {
if (j < vgmin) vgmin = j;
if (j > vgmax) vgmax = j;
}
#endif
}
}
#ifdef _LMP_INTEL_OFFLOAD
lmin = MIN(lmin,vlmin);
gmin = MIN(gmin,vgmin);
lmax = MAX(lmax,vlmax);
gmax = MAX(gmax,vgmax);
#endif
int n = lane, n2 = pack_offset;
for (int u = pack_offset; u < raw_count; u++) {
const int j = neighptr[u];
int pj = j;
if (pj < e_nall) {
if (need_ic)
if (pj < 0) pj = -pj - 1;
const int jtag = tag[pj];
int flist = 0;
if (itag > jtag) {
if ((itag+jtag) % 2 == 0) flist = 1;
} else if (itag < jtag) {
if ((itag+jtag) % 2 == 1) flist = 1;
} else {
if (x[pj].z < ztmp) flist = 1;
else if (x[pj].z == ztmp && x[pj].y < ytmp) flist = 1;
else if (x[pj].z == ztmp && x[pj].y == ytmp && x[pj].x < xtmp)
flist = 1;
}
if (flist) {
neighptr[n2++] = j;
} else {
neighptr[n] = j;
n += pack_width;
}
}
}
int ns = (n - lane) / pack_width;
atombin[i] = ns;
for (int u = pack_offset; u < n2; u++) {
neighptr[n] = neighptr[u];
n += pack_width;
}
ilist[i] = i;
cnumneigh[i] = ct + lane;
ns += n2 - pack_offset;
numneigh[i] = ns;
if (ns > max_chunk) max_chunk = ns;
lane++;
if (lane == pack_width) {
ct += max_chunk * pack_width;
const int alignb = (INTEL_DATA_ALIGN / sizeof(int));
const int edge = (ct % alignb);
if (edge) ct += alignb - edge;
neighptr = firstneigh + ct;
max_chunk = 0;
pack_offset = maxnbors * pack_width;
lane = 0;
if (ct + obound > list_size) {
if (i < ito - 1) {
*overflow = 1;
ct = (ifrom + tid * 2) * maxnbors;
}
}
}
}
if (*overflow == 1)
for (int i = ifrom; i < ito; i++)
numneigh[i] = 0;
#ifdef _LMP_INTEL_OFFLOAD
if (separate_buffers) {
#if defined(_OPENMP)
#pragma omp critical
#endif
{
if (lmin < overflow[LMP_LOCAL_MIN]) overflow[LMP_LOCAL_MIN] = lmin;
if (lmax > overflow[LMP_LOCAL_MAX]) overflow[LMP_LOCAL_MAX] = lmax;
if (gmin < overflow[LMP_GHOST_MIN]) overflow[LMP_GHOST_MIN] = gmin;
if (gmax > overflow[LMP_GHOST_MAX]) overflow[LMP_GHOST_MAX] = gmax;
}
#pragma omp barrier
}
int ghost_offset = 0, nall_offset = e_nall;
if (separate_buffers) {
int nghost = overflow[LMP_GHOST_MAX] + 1 - overflow[LMP_GHOST_MIN];
if (nghost < 0) nghost = 0;
if (offload) {
ghost_offset = overflow[LMP_GHOST_MIN] - overflow[LMP_LOCAL_MAX] - 1;
nall_offset = overflow[LMP_LOCAL_MAX] + 1 + nghost;
} else {
ghost_offset = overflow[LMP_GHOST_MIN] - nlocal;
nall_offset = nlocal + nghost;
}
}
#endif
if (molecular) {
for (int i = ifrom; i < ito; ++i) {
int * _noalias jlist = firstneigh + cnumneigh[i];
const int jnum = numneigh[i];
const int trip = jnum * pack_width;
for (int jj = 0; jj < trip; jj+=pack_width) {
const int j = jlist[jj];
if (need_ic && j < 0) {
which = 0;
jlist[jj] = -j - 1;
} else
ofind_special(which, special, nspecial, i, tag[j]);
#ifdef _LMP_INTEL_OFFLOAD
if (j >= nlocal) {
if (j == e_nall)
jlist[jj] = nall_offset;
else if (which)
jlist[jj] = (j-ghost_offset) ^ (which << SBBITS);
else jlist[jj]-=ghost_offset;
} else
#endif
if (which) jlist[jj] = j ^ (which << SBBITS);
}
}
}
#ifdef _LMP_INTEL_OFFLOAD
else if (separate_buffers) {
for (int i = ifrom; i < ito; ++i) {
int * _noalias jlist = firstneigh + cnumneigh[i];
const int jnum = numneigh[i];
int jj = 0;
for (jj = 0; jj < jnum; jj++) {
if (jlist[jj] >= nlocal) {
if (jlist[jj] == e_nall) jlist[jj] = nall_offset;
else jlist[jj] -= ghost_offset;
}
}
}
}
#endif
} // end omp
#if defined(__MIC__) && defined(_LMP_INTEL_OFFLOAD)
*timer_compute = MIC_Wtime() - *timer_compute;
#endif
} // end offload
#ifdef _LMP_INTEL_OFFLOAD
if (offload) {
_fix->stop_watch(TIME_OFFLOAD_LATENCY);
_fix->start_watch(TIME_HOST_NEIGHBOR);
for (int n = 0; n < aend; n++) {
ilist[n] = n;
numneigh[n] = 0;
}
} else {
for (int i = astart; i < aend; i++)
list->firstneigh[i] = firstneigh + cnumneigh[i];
if (separate_buffers) {
_fix->start_watch(TIME_PACK);
_fix->set_neighbor_host_sizes();
buffers->pack_sep_from_single(_fix->host_min_local(),
_fix->host_used_local(),
_fix->host_min_ghost(),
_fix->host_used_ghost());
_fix->stop_watch(TIME_PACK);
}
}
#else
for (int i = astart; i < aend; i++)
list->firstneigh[i] = firstneigh + cnumneigh[i];
#endif
}

View File

@ -0,0 +1,51 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef NPAIR_CLASS
NPairStyle(full/bin/intel,
NPairFullBinIntel,
NP_FULL | NP_BIN | NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI |
NP_INTEL)
#else
#ifndef LMP_NPAIR_FULL_BIN_INTEL_H
#define LMP_NPAIR_FULL_BIN_INTEL_H
#include "npair_intel.h"
#include "fix_intel.h"
namespace LAMMPS_NS {
class NPairFullBinIntel : public NPairIntel {
public:
NPairFullBinIntel(class LAMMPS *);
~NPairFullBinIntel() {}
void build(class NeighList *);
private:
template <class flt_t, class acc_t>
void fbi(NeighList *, IntelBuffers<flt_t,acc_t> *);
template <class flt_t, class acc_t, int, int>
void fbi(const int, NeighList *, IntelBuffers<flt_t,acc_t> *, const int,
const int, const int offload_end = 0);
};
}
#endif
#endif
/* ERROR/WARNING messages:
*/

View File

@ -0,0 +1,451 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: W. Michael Brown (Intel)
------------------------------------------------------------------------- */
#include "npair_half_bin_newtoff_intel.h"
#include "neighbor.h"
#include "neigh_list.h"
#include "atom.h"
#include "comm.h"
#include "group.h"
using namespace LAMMPS_NS;
/* ---------------------------------------------------------------------- */
NPairHalfBinNewtoffIntel::NPairHalfBinNewtoffIntel(LAMMPS *lmp) :
NPairIntel(lmp) {}
/* ----------------------------------------------------------------------
binned neighbor list construction with partial Newton's 3rd law
each owned atom i checks own bin and other bins in stencil
pair stored once if i,j are both owned and i < j
pair stored by me if j is ghost (also stored by proc owning j)
------------------------------------------------------------------------- */
void NPairHalfBinNewtoffIntel::build(NeighList *list)
{
if (nstencil > INTEL_MAX_STENCIL_CHECK)
error->all(FLERR, "Too many neighbor bins for USER-INTEL package.");
#ifdef _LMP_INTEL_OFFLOAD
if (exclude)
error->all(FLERR, "Exclusion lists not yet supported for Intel offload");
#endif
if (_fix->precision() == FixIntel::PREC_MODE_MIXED)
hbnni(list, _fix->get_mixed_buffers());
else if (_fix->precision() == FixIntel::PREC_MODE_DOUBLE)
hbnni(list, _fix->get_double_buffers());
else
hbnni(list, _fix->get_single_buffers());
_fix->stop_watch(TIME_HOST_NEIGHBOR);
}
template <class flt_t, class acc_t>
void NPairHalfBinNewtoffIntel::
hbnni(NeighList *list, IntelBuffers<flt_t,acc_t> *buffers) {
const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
list->inum = nlocal;
const int off_end = _fix->offload_end_neighbor();
int host_start = off_end;;
#ifdef _LMP_INTEL_OFFLOAD
if (off_end) grow_stencil();
if (_fix->full_host_list()) host_start = 0;
#endif
buffers->grow_list(list, atom->nlocal, comm->nthreads, off_end);
int need_ic = 0;
if (atom->molecular)
dminimum_image_check(need_ic, neighbor->cutneighmax, neighbor->cutneighmax,
neighbor->cutneighmax);
#ifdef _LMP_INTEL_OFFLOAD
if (need_ic) {
hbnni<flt_t,acc_t,1>(1, list, buffers, 0, off_end);
hbnni<flt_t,acc_t,1>(0, list, buffers, host_start, nlocal);
} else {
hbnni<flt_t,acc_t,0>(1, list, buffers, 0, off_end);
hbnni<flt_t,acc_t,0>(0, list, buffers, host_start, nlocal);
}
#else
if (need_ic)
hbnni<flt_t,acc_t,1>(0, list, buffers, host_start, nlocal);
else
hbnni<flt_t,acc_t,0>(0, list, buffers, host_start, nlocal);
#endif
}
template <class flt_t, class acc_t, int need_ic>
void NPairHalfBinNewtoffIntel::
hbnni(const int offload, NeighList *list, IntelBuffers<flt_t,acc_t> *buffers,
const int astart, const int aend) {
if (aend-astart == 0) return;
const int nall = atom->nlocal + atom->nghost;
int pad = 1;
#ifdef _LMP_INTEL_OFFLOAD
if (offload) {
if (INTEL_MIC_NBOR_PAD > 1)
pad = INTEL_MIC_NBOR_PAD * sizeof(float) / sizeof(flt_t);
} else
#endif
if (INTEL_NBOR_PAD > 1)
pad = INTEL_NBOR_PAD * sizeof(float) / sizeof(flt_t);
const int pad_width = pad;
const ATOM_T * _noalias const x = buffers->get_x();
int * _noalias const firstneigh = buffers->firstneigh(list);
const int molecular = atom->molecular;
int *ns = NULL;
tagint *s = NULL;
int tag_size = 0, special_size;
if (buffers->need_tag()) tag_size = nall;
if (molecular) {
s = atom->special[0];
ns = atom->nspecial[0];
special_size = aend;
} else {
s = &buffers->_special_holder;
ns = &buffers->_nspecial_holder;
special_size = 0;
}
const tagint * _noalias const special = s;
const int * _noalias const nspecial = ns;
const int maxspecial = atom->maxspecial;
const tagint * _noalias const tag = atom->tag;
int * _noalias const ilist = list->ilist;
int * _noalias numneigh = list->numneigh;
int * _noalias const cnumneigh = buffers->cnumneigh(list);
const int nstencil = this->nstencil;
const int * _noalias const stencil = this->stencil;
const flt_t * _noalias const cutneighsq = buffers->get_cutneighsq()[0];
const int ntypes = atom->ntypes + 1;
const int nlocal = atom->nlocal;
#ifndef _LMP_INTEL_OFFLOAD
int * const mask = atom->mask;
tagint * const molecule = atom->molecule;
#endif
int tnum;
int *overflow;
double *timer_compute;
#ifdef _LMP_INTEL_OFFLOAD
if (offload) {
timer_compute = _fix->off_watch_neighbor();
tnum = buffers->get_off_threads();
overflow = _fix->get_off_overflow_flag();
_fix->stop_watch(TIME_HOST_NEIGHBOR);
_fix->start_watch(TIME_OFFLOAD_LATENCY);
} else
#endif
{
tnum = comm->nthreads;
overflow = _fix->get_overflow_flag();
}
const int nthreads = tnum;
const int maxnbors = buffers->get_max_nbors();
int * _noalias const atombin = buffers->get_atombin();
const int * _noalias const binpacked = buffers->get_binpacked();
const int xperiodic = domain->xperiodic;
const int yperiodic = domain->yperiodic;
const int zperiodic = domain->zperiodic;
const flt_t xprd_half = domain->xprd_half;
const flt_t yprd_half = domain->yprd_half;
const flt_t zprd_half = domain->zprd_half;
#ifdef _LMP_INTEL_OFFLOAD
const int * _noalias const binhead = this->binhead;
const int * _noalias const bins = this->bins;
const int cop = _fix->coprocessor_number();
const int separate_buffers = _fix->separate_buffers();
#pragma offload target(mic:cop) if(offload) \
in(x:length(nall+1) alloc_if(0) free_if(0)) \
in(tag:length(tag_size) alloc_if(0) free_if(0)) \
in(special:length(special_size*maxspecial) alloc_if(0) free_if(0)) \
in(nspecial:length(special_size*3) alloc_if(0) free_if(0)) \
in(bins,binpacked:length(nall) alloc_if(0) free_if(0)) \
in(binhead:length(mbins+1) alloc_if(0) free_if(0)) \
in(cutneighsq:length(0) alloc_if(0) free_if(0)) \
in(firstneigh:length(0) alloc_if(0) free_if(0)) \
in(cnumneigh:length(0) alloc_if(0) free_if(0)) \
out(numneigh:length(0) alloc_if(0) free_if(0)) \
in(ilist:length(0) alloc_if(0) free_if(0)) \
in(atombin:length(aend) alloc_if(0) free_if(0)) \
in(stencil:length(nstencil) alloc_if(0) free_if(0)) \
in(maxnbors,nthreads,maxspecial,nstencil,pad_width,offload,nall) \
in(separate_buffers, astart, aend, nlocal, molecular, ntypes) \
in(xperiodic, yperiodic, zperiodic, xprd_half, yprd_half, zprd_half) \
out(overflow:length(5) alloc_if(0) free_if(0)) \
out(timer_compute:length(1) alloc_if(0) free_if(0)) \
signal(tag)
#endif
{
#if defined(__MIC__) && defined(_LMP_INTEL_OFFLOAD)
*timer_compute = MIC_Wtime();
#endif
#ifdef _LMP_INTEL_OFFLOAD
overflow[LMP_LOCAL_MIN] = astart;
overflow[LMP_LOCAL_MAX] = aend - 1;
overflow[LMP_GHOST_MIN] = nall;
overflow[LMP_GHOST_MAX] = -1;
#endif
int nstencilp = 0;
int binstart[INTEL_MAX_STENCIL], binend[INTEL_MAX_STENCIL];
for (int k = 0; k < nstencil; k++) {
binstart[nstencilp] = stencil[k];
int end = stencil[k] + 1;
for (int kk = k + 1; kk < nstencil; kk++) {
if (stencil[kk-1]+1 == stencil[kk]) {
end++;
k++;
} else break;
}
binend[nstencilp] = end;
nstencilp++;
}
#if defined(_OPENMP)
#pragma omp parallel default(none) \
shared(numneigh, overflow, nstencilp, binstart, binend)
#endif
{
#ifdef _LMP_INTEL_OFFLOAD
int lmin = nall, lmax = -1, gmin = nall, gmax = -1;
#endif
const int num = aend - astart;
int tid, ifrom, ito;
IP_PRE_omp_range_id(ifrom, ito, tid, num, nthreads);
ifrom += astart;
ito += astart;
int which;
const int list_size = (ito + tid + 1) * maxnbors;
int ct = (ifrom + tid) * maxnbors;
int *neighptr = firstneigh + ct;
for (int i = ifrom; i < ito; i++) {
int j, k, n, n2, itype, jtype, ibin;
double xtmp, ytmp, ztmp, delx, dely, delz, rsq;
n = 0;
n2 = maxnbors;
xtmp = x[i].x;
ytmp = x[i].y;
ztmp = x[i].z;
itype = x[i].w;
const int ioffset = ntypes*itype;
// loop over all atoms in other bins in stencil including self
// only store pair if i < j
// stores own/own pairs only once
// stores own/ghost pairs on both procs
ibin = atombin[i];
for (k = 0; k < nstencilp; k++) {
const int bstart = binhead[ibin + binstart[k]];
const int bend = binhead[ibin + binend[k]];
for (int jj = bstart; jj < bend; jj++) {
const int j = binpacked[jj];
if (j <= i) continue;
jtype = x[j].w;
#ifndef _LMP_INTEL_OFFLOAD
if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
#endif
delx = xtmp - x[j].x;
dely = ytmp - x[j].y;
delz = ztmp - x[j].z;
rsq = delx * delx + dely * dely + delz * delz;
if (rsq <= cutneighsq[ioffset + jtype]) {
if (j < nlocal) {
if (need_ic) {
int no_special;
ominimum_image_check(no_special, delx, dely, delz);
if (no_special)
neighptr[n++] = -j - 1;
else
neighptr[n++] = j;
} else
neighptr[n++] = j;
#ifdef _LMP_INTEL_OFFLOAD
if (j < lmin) lmin = j;
if (j > lmax) lmax = j;
#endif
} else {
if (need_ic) {
int no_special;
ominimum_image_check(no_special, delx, dely, delz);
if (no_special)
neighptr[n2++] = -j - 1;
else
neighptr[n2++] = j;
} else
neighptr[n2++] = j;
#ifdef _LMP_INTEL_OFFLOAD
if (j < gmin) gmin = j;
if (j > gmax) gmax = j;
#endif
}
}
}
}
ilist[i] = i;
cnumneigh[i] = ct;
if (n > maxnbors) *overflow = 1;
for (k = maxnbors; k < n2; k++) neighptr[n++] = neighptr[k];
const int edge = (n % pad_width);
if (edge) {
const int pad_end = n + (pad_width - edge);
#if defined(LMP_SIMD_COMPILER)
#pragma loop_count min=1, max=15, avg=8
#endif
for ( ; n < pad_end; n++)
neighptr[n] = nall;
}
numneigh[i] = n;
while((n % (INTEL_DATA_ALIGN / sizeof(int))) != 0) n++;
ct += n;
neighptr += n;
if (ct + n + maxnbors > list_size) {
*overflow = 1;
ct = (ifrom + tid) * maxnbors;
}
}
if (*overflow == 1)
for (int i = ifrom; i < ito; i++)
numneigh[i] = 0;
#ifdef _LMP_INTEL_OFFLOAD
if (separate_buffers) {
#if defined(_OPENMP)
#pragma omp critical
#endif
{
if (lmin < overflow[LMP_LOCAL_MIN]) overflow[LMP_LOCAL_MIN] = lmin;
if (lmax > overflow[LMP_LOCAL_MAX]) overflow[LMP_LOCAL_MAX] = lmax;
if (gmin < overflow[LMP_GHOST_MIN]) overflow[LMP_GHOST_MIN] = gmin;
if (gmax > overflow[LMP_GHOST_MAX]) overflow[LMP_GHOST_MAX] = gmax;
}
#pragma omp barrier
}
int ghost_offset = 0, nall_offset = nall;
if (separate_buffers) {
int nghost = overflow[LMP_GHOST_MAX] + 1 - overflow[LMP_GHOST_MIN];
if (nghost < 0) nghost = 0;
if (offload) {
ghost_offset = overflow[LMP_GHOST_MIN] - overflow[LMP_LOCAL_MAX] - 1;
nall_offset = overflow[LMP_LOCAL_MAX] + 1 + nghost;
} else {
ghost_offset = overflow[LMP_GHOST_MIN] - nlocal;
nall_offset = nlocal + nghost;
}
}
#endif
if (molecular) {
for (int i = ifrom; i < ito; ++i) {
int * _noalias jlist = firstneigh + cnumneigh[i];
const int jnum = numneigh[i];
for (int jj = 0; jj < jnum; jj++) {
const int j = jlist[jj];
if (need_ic && j < 0) {
which = 0;
jlist[jj] = -j - 1;
} else
ofind_special(which, special, nspecial, i, tag[j]);
#ifdef _LMP_INTEL_OFFLOAD
if (j >= nlocal) {
if (j == nall)
jlist[jj] = nall_offset;
else if (which)
jlist[jj] = (j-ghost_offset) ^ (which << SBBITS);
else jlist[jj]-=ghost_offset;
} else
#endif
if (which) jlist[jj] = j ^ (which << SBBITS);
}
}
}
#ifdef _LMP_INTEL_OFFLOAD
else if (separate_buffers) {
for (int i = ifrom; i < ito; ++i) {
int * _noalias jlist = firstneigh + cnumneigh[i];
const int jnum = numneigh[i];
int jj = 0;
for (jj = 0; jj < jnum; jj++)
if (jlist[jj] >= nlocal) break;
while (jj < jnum) {
if (jlist[jj] == nall) jlist[jj] = nall_offset;
else jlist[jj] -= ghost_offset;
jj++;
}
}
}
#endif
} // end omp
#if defined(__MIC__) && defined(_LMP_INTEL_OFFLOAD)
*timer_compute = MIC_Wtime() - *timer_compute;
#endif
} // end offload
#ifdef _LMP_INTEL_OFFLOAD
if (offload) {
_fix->stop_watch(TIME_OFFLOAD_LATENCY);
_fix->start_watch(TIME_HOST_NEIGHBOR);
for (int n = 0; n < aend; n++) {
ilist[n] = n;
numneigh[n] = 0;
}
} else {
for (int i = astart; i < aend; i++)
list->firstneigh[i] = firstneigh + cnumneigh[i];
if (separate_buffers) {
_fix->start_watch(TIME_PACK);
_fix->set_neighbor_host_sizes();
buffers->pack_sep_from_single(_fix->host_min_local(),
_fix->host_used_local(),
_fix->host_min_ghost(),
_fix->host_used_ghost());
_fix->stop_watch(TIME_PACK);
}
}
#else
for (int i = astart; i < aend; i++)
list->firstneigh[i] = firstneigh + cnumneigh[i];
#endif
}

View File

@ -0,0 +1,52 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef NPAIR_CLASS
NPairStyle(half/bin/newtoff/intel,
NPairHalfBinNewtoffIntel,
NP_HALF | NP_BIN | NP_NEWTOFF | NP_ORTHO | NP_TRI | NP_INTEL)
#else
#ifndef LMP_NPAIR_HALF_BIN_NEWTOFF_INTEL_H
#define LMP_NPAIR_HALF_BIN_NEWTOFF_INTEL_H
#include "npair_intel.h"
#include "fix_intel.h"
namespace LAMMPS_NS {
class NPairHalfBinNewtoffIntel : public NPairIntel {
public:
NPairHalfBinNewtoffIntel(class LAMMPS *);
~NPairHalfBinNewtoffIntel() {}
void build(class NeighList *);
private:
template <class flt_t, class acc_t>
void hbnni(NeighList *, IntelBuffers<flt_t,acc_t> *);
template <class flt_t, class acc_t, int>
void hbnni(const int, NeighList *, IntelBuffers<flt_t,acc_t> *, const int,
const int);
};
}
#endif
#endif
/* ERROR/WARNING messages:
*/

View File

@ -0,0 +1,610 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: W. Michael Brown (Intel)
------------------------------------------------------------------------- */
#include "npair_half_bin_newton_intel.h"
#include "neighbor.h"
#include "neigh_list.h"
#include "atom.h"
#include "comm.h"
#include "group.h"
using namespace LAMMPS_NS;
/* ---------------------------------------------------------------------- */
NPairHalfBinNewtonIntel::NPairHalfBinNewtonIntel(LAMMPS *lmp) :
NPairIntel(lmp) {}
/* ----------------------------------------------------------------------
binned neighbor list construction with full Newton's 3rd law
each owned atom i checks its own bin and other bins in Newton stencil
every pair stored exactly once by some processor
------------------------------------------------------------------------- */
void NPairHalfBinNewtonIntel::build(NeighList *list)
{
if (nstencil / 2 > INTEL_MAX_STENCIL_CHECK)
error->all(FLERR, "Too many neighbor bins for USER-INTEL package.");
#ifdef _LMP_INTEL_OFFLOAD
if (exclude)
error->all(FLERR, "Exclusion lists not yet supported for Intel offload");
#endif
if (_fix->precision() == FixIntel::PREC_MODE_MIXED)
hbni(list, _fix->get_mixed_buffers());
else if (_fix->precision() == FixIntel::PREC_MODE_DOUBLE)
hbni(list, _fix->get_double_buffers());
else
hbni(list, _fix->get_single_buffers());
_fix->stop_watch(TIME_HOST_NEIGHBOR);
}
template <class flt_t, class acc_t>
void NPairHalfBinNewtonIntel::
hbni(NeighList *list, IntelBuffers<flt_t,acc_t> *buffers) {
const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
list->inum = nlocal;
int host_start = _fix->host_start_neighbor();
const int off_end = _fix->offload_end_neighbor();
#ifdef _LMP_INTEL_OFFLOAD
if (off_end) grow_stencil();
if (_fix->full_host_list()) host_start = 0;
int offload_noghost = _fix->offload_noghost();
#endif
buffers->grow_list(list, atom->nlocal, comm->nthreads, off_end);
int need_ic = 0;
if (atom->molecular)
dminimum_image_check(need_ic, neighbor->cutneighmax, neighbor->cutneighmax,
neighbor->cutneighmax);
#ifdef _LMP_INTEL_OFFLOAD
if (need_ic) {
if (offload_noghost) {
hbni<flt_t,acc_t,1,1>(1, list, buffers, 0, off_end);
hbni<flt_t,acc_t,1,1>(0, list, buffers, host_start, nlocal, off_end);
} else {
hbni<flt_t,acc_t,0,1>(1, list, buffers, 0, off_end);
hbni<flt_t,acc_t,0,1>(0, list, buffers, host_start, nlocal);
}
} else {
if (offload_noghost) {
hbni<flt_t,acc_t,1,0>(1, list, buffers, 0, off_end);
hbni<flt_t,acc_t,1,0>(0, list, buffers, host_start, nlocal, off_end);
} else {
hbni<flt_t,acc_t,0,0>(1, list, buffers, 0, off_end);
hbni<flt_t,acc_t,0,0>(0, list, buffers, host_start, nlocal);
}
}
#else
if (need_ic)
hbni<flt_t,acc_t,0,1>(0, list, buffers, host_start, nlocal);
else
hbni<flt_t,acc_t,0,0>(0, list, buffers, host_start, nlocal);
#endif
}
template <class flt_t, class acc_t, int offload_noghost, int need_ic>
void NPairHalfBinNewtonIntel::
hbni(const int offload, NeighList *list, IntelBuffers<flt_t,acc_t> *buffers,
const int astart, const int aend, const int offload_end) {
if (aend-astart == 0) return;
const int nall = atom->nlocal + atom->nghost;
int pad = 1;
int nall_t = nall;
#ifdef _LMP_INTEL_OFFLOAD
if (offload_noghost && offload) nall_t = atom->nlocal;
if (offload) {
if (INTEL_MIC_NBOR_PAD > 1)
pad = INTEL_MIC_NBOR_PAD * sizeof(float) / sizeof(flt_t);
} else
#endif
if (INTEL_NBOR_PAD > 1)
pad = INTEL_NBOR_PAD * sizeof(float) / sizeof(flt_t);
const int pad_width = pad;
const ATOM_T * _noalias const x = buffers->get_x();
int * _noalias const firstneigh = buffers->firstneigh(list);
const int e_nall = nall_t;
const int molecular = atom->molecular;
int *ns = NULL;
tagint *s = NULL;
int tag_size = 0, special_size;
if (buffers->need_tag()) tag_size = e_nall;
if (molecular) {
s = atom->special[0];
ns = atom->nspecial[0];
special_size = aend;
} else {
s = &buffers->_special_holder;
ns = &buffers->_nspecial_holder;
special_size = 0;
}
const tagint * _noalias const special = s;
const int * _noalias const nspecial = ns;
const int maxspecial = atom->maxspecial;
const tagint * _noalias const tag = atom->tag;
int * _noalias const ilist = list->ilist;
int * _noalias numneigh = list->numneigh;
int * _noalias const cnumneigh = buffers->cnumneigh(list);
const int nstencil = this->nstencil;
const int * _noalias const stencil = this->stencil;
const flt_t * _noalias const cutneighsq = buffers->get_cutneighsq()[0];
const int ntypes = atom->ntypes + 1;
const int nlocal = atom->nlocal;
#ifndef _LMP_INTEL_OFFLOAD
int * const mask = atom->mask;
tagint * const molecule = atom->molecule;
#endif
int tnum;
int *overflow;
double *timer_compute;
#ifdef _LMP_INTEL_OFFLOAD
if (offload) {
timer_compute = _fix->off_watch_neighbor();
tnum = buffers->get_off_threads();
overflow = _fix->get_off_overflow_flag();
_fix->stop_watch(TIME_HOST_NEIGHBOR);
_fix->start_watch(TIME_OFFLOAD_LATENCY);
} else
#endif
{
tnum = comm->nthreads;
overflow = _fix->get_overflow_flag();
}
const int nthreads = tnum;
const int maxnbors = buffers->get_max_nbors();
int * _noalias const atombin = buffers->get_atombin();
const int * _noalias const binpacked = buffers->get_binpacked();
const int xperiodic = domain->xperiodic;
const int yperiodic = domain->yperiodic;
const int zperiodic = domain->zperiodic;
const flt_t xprd_half = domain->xprd_half;
const flt_t yprd_half = domain->yprd_half;
const flt_t zprd_half = domain->zprd_half;
#ifdef _LMP_INTEL_OFFLOAD
const int * _noalias const binhead = this->binhead;
const int * _noalias const bins = this->bins;
const int cop = _fix->coprocessor_number();
const int separate_buffers = _fix->separate_buffers();
#pragma offload target(mic:cop) if(offload) \
in(x:length(e_nall+1) alloc_if(0) free_if(0)) \
in(tag:length(tag_size) alloc_if(0) free_if(0)) \
in(special:length(special_size*maxspecial) alloc_if(0) free_if(0)) \
in(nspecial:length(special_size*3) alloc_if(0) free_if(0)) \
in(bins,binpacked:length(nall) alloc_if(0) free_if(0)) \
in(binhead:length(mbins+1) alloc_if(0) free_if(0)) \
in(cutneighsq:length(0) alloc_if(0) free_if(0)) \
in(firstneigh:length(0) alloc_if(0) free_if(0)) \
in(cnumneigh:length(0) alloc_if(0) free_if(0)) \
out(numneigh:length(0) alloc_if(0) free_if(0)) \
in(ilist:length(0) alloc_if(0) free_if(0)) \
in(atombin:length(aend) alloc_if(0) free_if(0)) \
in(stencil:length(nstencil) alloc_if(0) free_if(0)) \
in(maxnbors,nthreads,maxspecial,nstencil,e_nall,offload,pad_width) \
in(offload_end,separate_buffers,astart, aend, nlocal, molecular, ntypes) \
in(xperiodic, yperiodic, zperiodic, xprd_half, yprd_half, zprd_half) \
out(overflow:length(5) alloc_if(0) free_if(0)) \
out(timer_compute:length(1) alloc_if(0) free_if(0)) \
signal(tag)
#endif
{
#if defined(__MIC__) && defined(_LMP_INTEL_OFFLOAD)
*timer_compute = MIC_Wtime();
#endif
#ifdef _LMP_INTEL_OFFLOAD
overflow[LMP_LOCAL_MIN] = astart;
overflow[LMP_LOCAL_MAX] = aend - 1;
overflow[LMP_GHOST_MIN] = e_nall;
overflow[LMP_GHOST_MAX] = -1;
#endif
int nstencilp = 0;
int binstart[INTEL_MAX_STENCIL], binend[INTEL_MAX_STENCIL];
for (int k = 0; k < nstencil; k++) {
binstart[nstencilp] = stencil[k];
int end = stencil[k] + 1;
for (int kk = k + 1; kk < nstencil; kk++) {
if (stencil[kk-1]+1 == stencil[kk]) {
end++;
k++;
} else break;
}
binend[nstencilp] = end;
nstencilp++;
}
#if defined(_OPENMP)
#pragma omp parallel default(none) \
shared(numneigh, overflow, nstencilp, binstart, binend)
#endif
{
#ifdef _LMP_INTEL_OFFLOAD
int lmin = e_nall, lmax = -1, gmin = e_nall, gmax = -1;
#endif
const int num = aend - astart;
int tid, ifrom, ito;
#ifdef OUTER_CHUNK
const int swidth = ip_simd::SIMD_type<flt_t>::width();
IP_PRE_omp_range_id_vec(ifrom, ito, tid, num, nthreads, swidth);
ifrom += astart;
ito += astart;
int e_ito = ito;
if (ito == num) {
int imod = ito % swidth;
if (imod) e_ito += swidth - imod;
}
const int list_size = (e_ito + tid * 2 + 2) * maxnbors;
#else
const int swidth = 1;
IP_PRE_omp_range_id(ifrom, ito, tid, num, nthreads);
ifrom += astart;
ito += astart;
const int list_size = (ito + tid * 2 + 2) * maxnbors;
#endif
int which;
int pack_offset = maxnbors * swidth;
int ct = (ifrom + tid * 2) * maxnbors;
int *neighptr = firstneigh + ct;
const int obound = pack_offset + maxnbors * 2;
int max_chunk = 0;
int lane = 0;
for (int i = ifrom; i < ito; i++) {
const flt_t xtmp = x[i].x;
const flt_t ytmp = x[i].y;
const flt_t ztmp = x[i].z;
const int itype = x[i].w;
const int ioffset = ntypes * itype;
// loop over rest of atoms in i's bin, ghosts are at end of linked list
// if j is owned atom, store it, since j is beyond i in linked list
// if j is ghost, only store if j coords are "above/to the right" of i
int raw_count = pack_offset;
for (int j = bins[i]; j >= 0; j = bins[j]) {
if (j >= nlocal) {
#ifdef _LMP_INTEL_OFFLOAD
if (offload_noghost && offload) continue;
#endif
if (x[j].z < ztmp) continue;
if (x[j].z == ztmp) {
if (x[j].y < ytmp) continue;
if (x[j].y == ytmp && x[j].x < xtmp) continue;
}
}
#ifdef _LMP_INTEL_OFFLOAD
else if (offload_noghost && i < offload_end) continue;
#endif
#ifndef _LMP_INTEL_OFFLOAD
if (exclude) {
const int jtype = x[j].w;
if (exclusion(i,j,itype,jtype,mask,molecule)) continue;
}
#endif
neighptr[raw_count++] = j;
}
// loop over all atoms in other bins in stencil, store every pair
const int ibin = atombin[i];
if (exclude) {
for (int k = 0; k < nstencilp; k++) {
const int bstart = binhead[ibin + binstart[k]];
const int bend = binhead[ibin + binend[k]];
#ifndef _LMP_INTEL_OFFLOAD
#ifdef INTEL_VMASK
#pragma simd
#endif
#endif
for (int jj = bstart; jj < bend; jj++) {
const int j = binpacked[jj];
#ifdef _LMP_INTEL_OFFLOAD
if (offload_noghost) {
if (j < nlocal) {
if (i < offload_end) continue;
} else if (offload) continue;
}
#endif
#ifndef _LMP_INTEL_OFFLOAD
const int jtype = x[j].w;
if (exclusion(i,j,itype,jtype,mask,molecule)) continue;
#endif
neighptr[raw_count++] = j;
}
}
} else {
for (int k = 0; k < nstencilp; k++) {
const int bstart = binhead[ibin + binstart[k]];
const int bend = binhead[ibin + binend[k]];
#ifndef _LMP_INTEL_OFFLOAD
#ifdef INTEL_VMASK
#pragma simd
#endif
#endif
for (int jj = bstart; jj < bend; jj++) {
const int j = binpacked[jj];
#ifdef _LMP_INTEL_OFFLOAD
if (offload_noghost) {
if (j < nlocal) {
if (i < offload_end) continue;
} else if (offload) continue;
}
#endif
neighptr[raw_count++] = j;
}
}
}
if (raw_count > obound) *overflow = 1;
#if defined(LMP_SIMD_COMPILER)
#ifdef _LMP_INTEL_OFFLOAD
int vlmin = lmin, vlmax = lmax, vgmin = gmin, vgmax = gmax;
#if __INTEL_COMPILER+0 > 1499
#pragma vector aligned
#pragma simd reduction(max:vlmax,vgmax) reduction(min:vlmin, vgmin)
#endif
#else
#pragma vector aligned
#pragma simd
#endif
#endif
for (int u = pack_offset; u < raw_count; u++) {
int j = neighptr[u];
const flt_t delx = xtmp - x[j].x;
const flt_t dely = ytmp - x[j].y;
const flt_t delz = ztmp - x[j].z;
const int jtype = x[j].w;
const flt_t rsq = delx * delx + dely * dely + delz * delz;
if (rsq > cutneighsq[ioffset + jtype])
neighptr[u] = e_nall;
else {
if (need_ic) {
int no_special;
ominimum_image_check(no_special, delx, dely, delz);
if (no_special)
neighptr[u] = -j - 1;
}
#ifdef _LMP_INTEL_OFFLOAD
if (j < nlocal) {
if (j < vlmin) vlmin = j;
if (j > vlmax) vlmax = j;
} else {
if (j < vgmin) vgmin = j;
if (j > vgmax) vgmax = j;
}
#endif
}
}
#ifdef _LMP_INTEL_OFFLOAD
lmin = MIN(lmin,vlmin);
gmin = MIN(gmin,vgmin);
lmax = MAX(lmax,vlmax);
gmax = MAX(gmax,vgmax);
#endif
int n = lane, n2 = pack_offset;
for (int u = pack_offset; u < raw_count; u++) {
const int j = neighptr[u];
int pj = j;
if (pj < e_nall) {
if (need_ic)
if (pj < 0) pj = -pj - 1;
if (pj < nlocal) {
neighptr[n] = j;
n += swidth;
} else
neighptr[n2++] = j;
}
}
int ns = (n - lane) / swidth;
for (int u = pack_offset; u < n2; u++) {
neighptr[n] = neighptr[u];
n += swidth;
}
ilist[i] = i;
cnumneigh[i] = ct + lane;
ns += n2 - pack_offset;
#ifndef OUTER_CHUNK
int edge = (ns % pad_width);
if (edge) {
const int pad_end = ns + (pad_width - edge);
#if defined(LMP_SIMD_COMPILER)
#pragma loop_count min=1, max=15, avg=8
#endif
for ( ; ns < pad_end; ns++)
neighptr[ns] = e_nall;
}
#endif
numneigh[i] = ns;
#ifdef OUTER_CHUNK
if (ns > max_chunk) max_chunk = ns;
lane++;
if (lane == swidth) {
ct += max_chunk * swidth;
const int alignb = (INTEL_DATA_ALIGN / sizeof(int));
int edge = (ct % alignb);
if (edge) ct += alignb - edge;
neighptr = firstneigh + ct;
max_chunk = 0;
pack_offset = maxnbors * swidth;
lane = 0;
if (ct + obound > list_size) {
if (i < ito - 1) {
*overflow = 1;
ct = (ifrom + tid * 2) * maxnbors;
}
}
}
#else
ct += ns;
const int alignb = (INTEL_DATA_ALIGN / sizeof(int));
edge = (ct % alignb);
if (edge) ct += alignb - edge;
neighptr = firstneigh + ct;
if (ct + obound > list_size) {
if (i < ito - 1) {
*overflow = 1;
ct = (ifrom + tid * 2) * maxnbors;
}
}
#endif
}
if (*overflow == 1)
for (int i = ifrom; i < ito; i++)
numneigh[i] = 0;
#ifdef _LMP_INTEL_OFFLOAD
if (separate_buffers) {
#if defined(_OPENMP)
#pragma omp critical
#endif
{
if (lmin < overflow[LMP_LOCAL_MIN]) overflow[LMP_LOCAL_MIN] = lmin;
if (lmax > overflow[LMP_LOCAL_MAX]) overflow[LMP_LOCAL_MAX] = lmax;
if (gmin < overflow[LMP_GHOST_MIN]) overflow[LMP_GHOST_MIN] = gmin;
if (gmax > overflow[LMP_GHOST_MAX]) overflow[LMP_GHOST_MAX] = gmax;
}
#pragma omp barrier
}
int ghost_offset = 0, nall_offset = e_nall;
if (separate_buffers) {
int nghost = overflow[LMP_GHOST_MAX] + 1 - overflow[LMP_GHOST_MIN];
if (nghost < 0) nghost = 0;
if (offload) {
ghost_offset = overflow[LMP_GHOST_MIN] - overflow[LMP_LOCAL_MAX] - 1;
nall_offset = overflow[LMP_LOCAL_MAX] + 1 + nghost;
} else {
ghost_offset = overflow[LMP_GHOST_MIN] - nlocal;
nall_offset = nlocal + nghost;
}
}
#endif
if (molecular) {
for (int i = ifrom; i < ito; ++i) {
int * _noalias jlist = firstneigh + cnumneigh[i];
const int jnum = numneigh[i];
#ifndef OUTER_CHUNK
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#pragma simd
#endif
for (int jj = 0; jj < jnum; jj++) {
#else
const int trip = jnum * swidth;
for (int jj = 0; jj < trip; jj+= swidth) {
#endif
const int j = jlist[jj];
if (need_ic && j < 0) {
which = 0;
jlist[jj] = -j - 1;
} else
ofind_special(which, special, nspecial, i, tag[j]);
#ifdef _LMP_INTEL_OFFLOAD
if (j >= nlocal) {
if (j == e_nall)
jlist[jj] = nall_offset;
else if (which)
jlist[jj] = (j-ghost_offset) ^ (which << SBBITS);
else jlist[jj]-=ghost_offset;
} else
#endif
if (which) jlist[jj] = j ^ (which << SBBITS);
}
}
}
#ifdef _LMP_INTEL_OFFLOAD
else if (separate_buffers) {
for (int i = ifrom; i < ito; ++i) {
int * _noalias jlist = firstneigh + cnumneigh[i];
const int jnum = numneigh[i];
int jj = 0;
for (jj = 0; jj < jnum; jj++)
if (jlist[jj] >= nlocal) break;
while (jj < jnum) {
if (jlist[jj] == e_nall) jlist[jj] = nall_offset;
else jlist[jj] -= ghost_offset;
jj++;
}
}
}
#endif
} // end omp
#if defined(__MIC__) && defined(_LMP_INTEL_OFFLOAD)
*timer_compute = MIC_Wtime() - *timer_compute;
#endif
} // end offload
#ifdef _LMP_INTEL_OFFLOAD
if (offload) {
_fix->stop_watch(TIME_OFFLOAD_LATENCY);
_fix->start_watch(TIME_HOST_NEIGHBOR);
for (int n = 0; n < aend; n++) {
ilist[n] = n;
numneigh[n] = 0;
}
} else {
for (int i = astart; i < aend; i++)
list->firstneigh[i] = firstneigh + cnumneigh[i];
if (separate_buffers) {
_fix->start_watch(TIME_PACK);
_fix->set_neighbor_host_sizes();
buffers->pack_sep_from_single(_fix->host_min_local(),
_fix->host_used_local(),
_fix->host_min_ghost(),
_fix->host_used_ghost());
_fix->stop_watch(TIME_PACK);
}
}
#else
for (int i = astart; i < aend; i++)
list->firstneigh[i] = firstneigh + cnumneigh[i];
#endif
}

View File

@ -0,0 +1,51 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef NPAIR_CLASS
NPairStyle(half/bin/newton/intel,
NPairHalfBinNewtonIntel,
NP_HALF | NP_BIN | NP_NEWTON | NP_ORTHO | NP_INTEL)
#else
#ifndef LMP_NPAIR_HALF_BIN_NEWTON_INTEL_H
#define LMP_NPAIR_HALF_BIN_NEWTON_INTEL_H
#include "npair_intel.h"
#include "fix_intel.h"
namespace LAMMPS_NS {
class NPairHalfBinNewtonIntel : public NPairIntel {
public:
NPairHalfBinNewtonIntel(class LAMMPS *);
~NPairHalfBinNewtonIntel() {}
void build(class NeighList *);
private:
template <class flt_t, class acc_t>
void hbni(NeighList *, IntelBuffers<flt_t,acc_t> *);
template <class flt_t, class acc_t, int, int>
void hbni(const int, NeighList *, IntelBuffers<flt_t,acc_t> *, const int,
const int, const int offload_end = 0);
};
}
#endif
#endif
/* ERROR/WARNING messages:
*/

View File

@ -0,0 +1,513 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: W. Michael Brown (Intel)
------------------------------------------------------------------------- */
#include "npair_half_bin_newton_tri_intel.h"
#include "neighbor.h"
#include "neigh_list.h"
#include "atom.h"
#include "comm.h"
#include "group.h"
using namespace LAMMPS_NS;
/* ---------------------------------------------------------------------- */
NPairHalfBinNewtonTriIntel::NPairHalfBinNewtonTriIntel(LAMMPS *lmp) :
NPairIntel(lmp) {}
/* ----------------------------------------------------------------------
binned neighbor list construction with Newton's 3rd law for triclinic
each owned atom i checks its own bin and other bins in triclinic stencil
every pair stored exactly once by some processor
------------------------------------------------------------------------- */
void NPairHalfBinNewtonTriIntel::build(NeighList *list)
{
if (nstencil > INTEL_MAX_STENCIL)
error->all(FLERR, "Too many neighbor bins for USER-INTEL package.");
#ifdef _LMP_INTEL_OFFLOAD
if (exclude)
error->all(FLERR, "Exclusion lists not yet supported for Intel offload");
#endif
if (_fix->precision() == FixIntel::PREC_MODE_MIXED)
hbnti(list, _fix->get_mixed_buffers());
else if (_fix->precision() == FixIntel::PREC_MODE_DOUBLE)
hbnti(list, _fix->get_double_buffers());
else
hbnti(list, _fix->get_single_buffers());
_fix->stop_watch(TIME_HOST_NEIGHBOR);
}
template <class flt_t, class acc_t>
void NPairHalfBinNewtonTriIntel::
hbnti(NeighList *list, IntelBuffers<flt_t,acc_t> *buffers) {
const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
list->inum = nlocal;
int host_start = _fix->host_start_neighbor();
const int off_end = _fix->offload_end_neighbor();
#ifdef _LMP_INTEL_OFFLOAD
if (off_end) grow_stencil();
if (_fix->full_host_list()) host_start = 0;
int offload_noghost = _fix->offload_noghost();
#endif
buffers->grow_list(list, atom->nlocal, comm->nthreads, off_end);
int need_ic = 0;
if (atom->molecular)
dminimum_image_check(need_ic, neighbor->cutneighmax, neighbor->cutneighmax,
neighbor->cutneighmax);
#ifdef _LMP_INTEL_OFFLOAD
if (need_ic) {
if (offload_noghost) {
hbnti<flt_t,acc_t,1,1>(1, list, buffers, 0, off_end);
hbnti<flt_t,acc_t,1,1>(0, list, buffers, host_start, nlocal, off_end);
} else {
hbnti<flt_t,acc_t,0,1>(1, list, buffers, 0, off_end);
hbnti<flt_t,acc_t,0,1>(0, list, buffers, host_start, nlocal);
}
} else {
if (offload_noghost) {
hbnti<flt_t,acc_t,1,0>(1, list, buffers, 0, off_end);
hbnti<flt_t,acc_t,1,0>(0, list, buffers, host_start, nlocal, off_end);
} else {
hbnti<flt_t,acc_t,0,0>(1, list, buffers, 0, off_end);
hbnti<flt_t,acc_t,0,0>(0, list, buffers, host_start, nlocal);
}
}
#else
if (need_ic)
hbnti<flt_t,acc_t,0,1>(0, list, buffers, host_start, nlocal);
else
hbnti<flt_t,acc_t,0,0>(0, list, buffers, host_start, nlocal);
#endif
}
template <class flt_t, class acc_t, int offload_noghost, int need_ic>
void NPairHalfBinNewtonTriIntel::
hbnti(const int offload, NeighList *list, IntelBuffers<flt_t,acc_t> *buffers,
const int astart, const int aend, const int offload_end) {
if (aend-astart == 0) return;
const int nall = atom->nlocal + atom->nghost;
int pad = 1;
int nall_t = nall;
#ifdef _LMP_INTEL_OFFLOAD
if (offload_noghost && offload) nall_t = atom->nlocal;
if (offload) {
if (INTEL_MIC_NBOR_PAD > 1)
pad = INTEL_MIC_NBOR_PAD * sizeof(float) / sizeof(flt_t);
} else
#endif
if (INTEL_NBOR_PAD > 1)
pad = INTEL_NBOR_PAD * sizeof(float) / sizeof(flt_t);
const int pad_width = pad;
const ATOM_T * _noalias const x = buffers->get_x();
int * _noalias const firstneigh = buffers->firstneigh(list);
const int e_nall = nall_t;
const int molecular = atom->molecular;
int *ns = NULL;
tagint *s = NULL;
int tag_size = 0, special_size;
if (buffers->need_tag()) tag_size = e_nall;
if (molecular) {
s = atom->special[0];
ns = atom->nspecial[0];
special_size = aend;
} else {
s = &buffers->_special_holder;
ns = &buffers->_nspecial_holder;
special_size = 0;
}
const tagint * _noalias const special = s;
const int * _noalias const nspecial = ns;
const int maxspecial = atom->maxspecial;
const tagint * _noalias const tag = atom->tag;
int * _noalias const ilist = list->ilist;
int * _noalias numneigh = list->numneigh;
int * _noalias const cnumneigh = buffers->cnumneigh(list);
const int nstencil = this->nstencil;
const int * _noalias const stencil = this->stencil;
const flt_t * _noalias const cutneighsq = buffers->get_cutneighsq()[0];
const int ntypes = atom->ntypes + 1;
const int nlocal = atom->nlocal;
#ifndef _LMP_INTEL_OFFLOAD
int * const mask = atom->mask;
tagint * const molecule = atom->molecule;
#endif
int tnum;
int *overflow;
double *timer_compute;
#ifdef _LMP_INTEL_OFFLOAD
if (offload) {
timer_compute = _fix->off_watch_neighbor();
tnum = buffers->get_off_threads();
overflow = _fix->get_off_overflow_flag();
_fix->stop_watch(TIME_HOST_NEIGHBOR);
_fix->start_watch(TIME_OFFLOAD_LATENCY);
} else
#endif
{
tnum = comm->nthreads;
overflow = _fix->get_overflow_flag();
}
const int nthreads = tnum;
const int maxnbors = buffers->get_max_nbors();
int * _noalias const atombin = buffers->get_atombin();
const int * _noalias const binpacked = buffers->get_binpacked();
const int xperiodic = domain->xperiodic;
const int yperiodic = domain->yperiodic;
const int zperiodic = domain->zperiodic;
const flt_t xprd_half = domain->xprd_half;
const flt_t yprd_half = domain->yprd_half;
const flt_t zprd_half = domain->zprd_half;
#ifdef _LMP_INTEL_OFFLOAD
const int * _noalias const binhead = this->binhead;
const int * _noalias const bins = this->bins;
const int cop = _fix->coprocessor_number();
const int separate_buffers = _fix->separate_buffers();
#pragma offload target(mic:cop) if(offload) \
in(x:length(e_nall+1) alloc_if(0) free_if(0)) \
in(tag:length(tag_size) alloc_if(0) free_if(0)) \
in(special:length(special_size*maxspecial) alloc_if(0) free_if(0)) \
in(nspecial:length(special_size*3) alloc_if(0) free_if(0)) \
in(bins,binpacked:length(nall) alloc_if(0) free_if(0)) \
in(binhead:length(mbins+1) alloc_if(0) free_if(0)) \
in(cutneighsq:length(0) alloc_if(0) free_if(0)) \
in(firstneigh:length(0) alloc_if(0) free_if(0)) \
in(cnumneigh:length(0) alloc_if(0) free_if(0)) \
out(numneigh:length(0) alloc_if(0) free_if(0)) \
in(ilist:length(0) alloc_if(0) free_if(0)) \
in(atombin:length(aend) alloc_if(0) free_if(0)) \
in(stencil:length(nstencil) alloc_if(0) free_if(0)) \
in(maxnbors,nthreads,maxspecial,nstencil,offload_end,pad_width,e_nall) \
in(offload,separate_buffers, astart, aend, nlocal, molecular, ntypes) \
in(xperiodic, yperiodic, zperiodic, xprd_half, yprd_half, zprd_half) \
out(overflow:length(5) alloc_if(0) free_if(0)) \
out(timer_compute:length(1) alloc_if(0) free_if(0)) \
signal(tag)
#endif
{
#if defined(__MIC__) && defined(_LMP_INTEL_OFFLOAD)
*timer_compute = MIC_Wtime();
#endif
#ifdef _LMP_INTEL_OFFLOAD
overflow[LMP_LOCAL_MIN] = astart;
overflow[LMP_LOCAL_MAX] = aend - 1;
overflow[LMP_GHOST_MIN] = e_nall;
overflow[LMP_GHOST_MAX] = -1;
#endif
int nstencilp = 0;
int binstart[INTEL_MAX_STENCIL], binend[INTEL_MAX_STENCIL];
for (int k = 0; k < nstencil; k++) {
binstart[nstencilp] = stencil[k];
int end = stencil[k] + 1;
for (int kk = k + 1; kk < nstencil; kk++) {
if (stencil[kk-1]+1 == stencil[kk]) {
end++;
k++;
} else break;
}
binend[nstencilp] = end;
nstencilp++;
}
#if defined(_OPENMP)
#pragma omp parallel default(none) \
shared(numneigh, overflow, nstencilp, binstart, binend)
#endif
{
#ifdef _LMP_INTEL_OFFLOAD
int lmin = e_nall, lmax = -1, gmin = e_nall, gmax = -1;
#endif
const int num = aend - astart;
int tid, ifrom, ito;
IP_PRE_omp_range_id(ifrom, ito, tid, num, nthreads);
ifrom += astart;
ito += astart;
int which;
const int list_size = (ito + tid * 2 + 2) * maxnbors;
int ct = (ifrom + tid * 2) * maxnbors;
int *neighptr = firstneigh + ct;
const int obound = maxnbors * 3;
for (int i = ifrom; i < ito; i++) {
const flt_t xtmp = x[i].x;
const flt_t ytmp = x[i].y;
const flt_t ztmp = x[i].z;
const int itype = x[i].w;
const int ioffset = ntypes * itype;
// loop over all atoms in bins in stencil
// pairs for atoms j "below" i are excluded
// below = lower z or (equal z and lower y) or (equal zy and lower x)
// (equal zyx and j <= i)
// latter excludes self-self interaction but allows superposed atoms
const int ibin = atombin[i];
int raw_count = maxnbors;
for (int k = 0; k < nstencilp; k++) {
const int bstart = binhead[ibin + binstart[k]];
const int bend = binhead[ibin + binend[k]];
for (int jj = bstart; jj < bend; jj++) {
const int j = binpacked[jj];
#ifdef _LMP_INTEL_OFFLOAD
if (offload_noghost) {
if (j < nlocal) {
if (i < offload_end) continue;
} else if (offload) continue;
}
#endif
if (x[j].z < ztmp) continue;
if (x[j].z == ztmp) {
if (x[j].y < ytmp) continue;
if (x[j].y == ytmp) {
if (x[j].x < xtmp) continue;
if (x[j].x == xtmp && j <= i) continue;
}
}
#ifndef _LMP_INTEL_OFFLOAD
if (exclude) {
const int jtype = x[j].w;
if (exclusion(i,j,itype,jtype,mask,molecule)) continue;
}
#endif
neighptr[raw_count++] = j;
}
}
if (raw_count > obound)
*overflow = 1;
#if defined(LMP_SIMD_COMPILER)
#ifdef _LMP_INTEL_OFFLOAD
int vlmin = lmin, vlmax = lmax, vgmin = gmin, vgmax = gmax;
#if __INTEL_COMPILER+0 > 1499
#pragma vector aligned
#pragma simd reduction(max:vlmax,vgmax) reduction(min:vlmin, vgmin)
#endif
#else
#pragma vector aligned
#pragma simd
#endif
#endif
for (int u = maxnbors; u < raw_count; u++) {
int j = neighptr[u];
const flt_t delx = xtmp - x[j].x;
const flt_t dely = ytmp - x[j].y;
const flt_t delz = ztmp - x[j].z;
const int jtype = x[j].w;
const flt_t rsq = delx * delx + dely * dely + delz * delz;
if (rsq > cutneighsq[ioffset + jtype])
neighptr[u] = e_nall;
else {
if (need_ic) {
int no_special;
ominimum_image_check(no_special, delx, dely, delz);
if (no_special)
neighptr[u] = -j - 1;
}
#ifdef _LMP_INTEL_OFFLOAD
if (j < nlocal) {
if (j < vlmin) vlmin = j;
if (j > vlmax) vlmax = j;
} else {
if (j < vgmin) vgmin = j;
if (j > vgmax) vgmax = j;
}
#endif
}
}
int n = 0, n2 = maxnbors;
for (int u = maxnbors; u < raw_count; u++) {
const int j = neighptr[u];
int pj = j;
if (pj < e_nall) {
if (need_ic)
if (pj < 0) pj = -pj - 1;
if (pj < nlocal)
neighptr[n++] = j;
else
neighptr[n2++] = j;
}
}
int ns = n;
for (int u = maxnbors; u < n2; u++)
neighptr[n++] = neighptr[u];
ilist[i] = i;
cnumneigh[i] = ct;
ns += n2 - maxnbors;
int edge = (ns % pad_width);
if (edge) {
const int pad_end = ns + (pad_width - edge);
#if defined(LMP_SIMD_COMPILER)
#pragma loop_count min=1, max=15, avg=8
#endif
for ( ; ns < pad_end; ns++)
neighptr[ns] = e_nall;
}
numneigh[i] = ns;
ct += ns;
const int alignb = (INTEL_DATA_ALIGN / sizeof(int));
edge = (ct % alignb);
if (edge) ct += alignb - edge;
neighptr = firstneigh + ct;
if (ct + obound > list_size) {
if (i < ito - 1) {
*overflow = 1;
ct = (ifrom + tid * 2) * maxnbors;
}
}
}
if (*overflow == 1)
for (int i = ifrom; i < ito; i++)
numneigh[i] = 0;
#ifdef _LMP_INTEL_OFFLOAD
if (separate_buffers) {
#if defined(_OPENMP)
#pragma omp critical
#endif
{
if (lmin < overflow[LMP_LOCAL_MIN]) overflow[LMP_LOCAL_MIN] = lmin;
if (lmax > overflow[LMP_LOCAL_MAX]) overflow[LMP_LOCAL_MAX] = lmax;
if (gmin < overflow[LMP_GHOST_MIN]) overflow[LMP_GHOST_MIN] = gmin;
if (gmax > overflow[LMP_GHOST_MAX]) overflow[LMP_GHOST_MAX] = gmax;
}
#pragma omp barrier
}
int ghost_offset = 0, nall_offset = e_nall;
if (separate_buffers) {
int nghost = overflow[LMP_GHOST_MAX] + 1 - overflow[LMP_GHOST_MIN];
if (nghost < 0) nghost = 0;
if (offload) {
ghost_offset = overflow[LMP_GHOST_MIN] - overflow[LMP_LOCAL_MAX] - 1;
nall_offset = overflow[LMP_LOCAL_MAX] + 1 + nghost;
} else {
ghost_offset = overflow[LMP_GHOST_MIN] - nlocal;
nall_offset = nlocal + nghost;
}
}
#endif
if (molecular) {
for (int i = ifrom; i < ito; ++i) {
int * _noalias jlist = firstneigh + cnumneigh[i];
const int jnum = numneigh[i];
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#pragma simd
#endif
for (int jj = 0; jj < jnum; jj++) {
const int j = jlist[jj];
if (need_ic && j < 0) {
which = 0;
jlist[jj] = -j - 1;
} else
ofind_special(which, special, nspecial, i, tag[j]);
#ifdef _LMP_INTEL_OFFLOAD
if (j >= nlocal) {
if (j == e_nall)
jlist[jj] = nall_offset;
else if (which)
jlist[jj] = (j-ghost_offset) ^ (which << SBBITS);
else jlist[jj]-=ghost_offset;
} else
#endif
if (which) jlist[jj] = j ^ (which << SBBITS);
}
}
}
#ifdef _LMP_INTEL_OFFLOAD
else if (separate_buffers) {
for (int i = ifrom; i < ito; ++i) {
int * _noalias jlist = firstneigh + cnumneigh[i];
const int jnum = numneigh[i];
int jj = 0;
for (jj = 0; jj < jnum; jj++)
if (jlist[jj] >= nlocal) break;
while (jj < jnum) {
if (jlist[jj] == e_nall) jlist[jj] = nall_offset;
else jlist[jj] -= ghost_offset;
jj++;
}
}
}
#endif
} // end omp
#if defined(__MIC__) && defined(_LMP_INTEL_OFFLOAD)
*timer_compute = MIC_Wtime() - *timer_compute;
#endif
} // end offload
#ifdef _LMP_INTEL_OFFLOAD
if (offload) {
_fix->stop_watch(TIME_OFFLOAD_LATENCY);
_fix->start_watch(TIME_HOST_NEIGHBOR);
for (int n = 0; n < aend; n++) {
ilist[n] = n;
numneigh[n] = 0;
}
} else {
for (int i = astart; i < aend; i++)
list->firstneigh[i] = firstneigh + cnumneigh[i];
if (separate_buffers) {
_fix->start_watch(TIME_PACK);
_fix->set_neighbor_host_sizes();
buffers->pack_sep_from_single(_fix->host_min_local(),
_fix->host_used_local(),
_fix->host_min_ghost(),
_fix->host_used_ghost());
_fix->stop_watch(TIME_PACK);
}
}
#else
for (int i = astart; i < aend; i++)
list->firstneigh[i] = firstneigh + cnumneigh[i];
#endif
}

View File

@ -0,0 +1,51 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef NPAIR_CLASS
NPairStyle(half/bin/newton/tri/intel,
NPairHalfBinNewtonTriIntel,
NP_HALF | NP_BIN | NP_NEWTON | NP_TRI | NP_INTEL)
#else
#ifndef LMP_NPAIR_HALF_BIN_NEWTON_INTEL_TRI_H
#define LMP_NPAIR_HALF_BIN_NEWTON_INTEL_TRI_H
#include "npair_intel.h"
#include "fix_intel.h"
namespace LAMMPS_NS {
class NPairHalfBinNewtonTriIntel : public NPairIntel {
public:
NPairHalfBinNewtonTriIntel(class LAMMPS *);
~NPairHalfBinNewtonTriIntel() {}
void build(class NeighList *);
private:
template <class flt_t, class acc_t>
void hbnti(NeighList *, IntelBuffers<flt_t,acc_t> *);
template <class flt_t, class acc_t, int, int>
void hbnti(const int, NeighList *, IntelBuffers<flt_t,acc_t> *, const int,
const int, const int offload_end = 0);
};
}
#endif
#endif
/* ERROR/WARNING messages:
*/

View File

@ -0,0 +1,69 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: W. Michael Brown (Intel)
------------------------------------------------------------------------- */
#include "npair_intel.h"
#include "nstencil.h"
using namespace LAMMPS_NS;
/* ---------------------------------------------------------------------- */
NPairIntel::NPairIntel(LAMMPS *lmp) : NPair(lmp) {
int ifix = modify->find_fix("package_intel");
if (ifix < 0)
error->all(FLERR,
"The 'package intel' command is required for /intel styles");
_fix = static_cast<FixIntel *>(modify->fix[ifix]);
#ifdef _LMP_INTEL_OFFLOAD
_cop = _fix->coprocessor_number();
_off_map_stencil = 0;
#endif
}
/* ---------------------------------------------------------------------- */
NPairIntel::~NPairIntel() {
#ifdef _LMP_INTEL_OFFLOAD
if (_off_map_stencil) {
const int * stencil = this->stencil;
#pragma offload_transfer target(mic:_cop) \
nocopy(stencil:alloc_if(0) free_if(1))
}
#endif
}
/* ----------------------------------------------------------------------
copy needed info from NStencil class to this build class
------------------------------------------------------------------------- */
#ifdef _LMP_INTEL_OFFLOAD
void NPairIntel::grow_stencil()
{
if (_off_map_stencil != stencil) {
if (_off_map_stencil) {
const int * stencil = _off_map_stencil;
#pragma offload_transfer target(mic:_cop) \
nocopy(stencil:alloc_if(0) free_if(1))
}
_off_map_stencil = stencil;
const int * stencil = _off_map_stencil;
const int maxstencil = ns->get_maxstencil();
#pragma offload_transfer target(mic:_cop) \
in(stencil:length(maxstencil) alloc_if(1) free_if(0))
}
}
#endif

View File

@ -0,0 +1,117 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifndef LMP_NPAIR_INTEL_H
#define LMP_NPAIR_INTEL_H
#include "npair.h"
#include "fix_intel.h"
#if defined(_OPENMP)
#include <omp.h>
#endif
#ifdef LMP_USE_AVXCD
#include "intel_simd.h"
#endif
#ifdef OUTER_CHUNK
#include "intel_simd.h"
#endif
#ifdef _LMP_INTEL_OFFLOAD
#pragma offload_attribute(push,target(mic))
#endif
#define ofind_special(which, special, nspecial, i, tag) \
{ \
which = 0; \
const int n1 = nspecial[i * 3]; \
const int n2 = nspecial[i * 3 + 1]; \
const int n3 = nspecial[i * 3 + 2]; \
const tagint *sptr = special + i * maxspecial; \
for (int s = 0; s < n3; s++) { \
if (sptr[s] == tag) { \
if (s < n1) { \
which = 1; \
} else if (s < n2) { \
which = 2; \
} else { \
which = 3; \
} \
} \
} \
}
#define ominimum_image_check(answer, dx, dy, dz) \
{ \
answer = 0; \
if (xperiodic && fabs(dx) > xprd_half) answer = 1; \
if (yperiodic && fabs(dy) > yprd_half) answer = 1; \
if (zperiodic && fabs(dz) > zprd_half) answer = 1; \
}
#define dminimum_image_check(answer, dx, dy, dz) \
{ \
answer = 0; \
if (domain->xperiodic && fabs(dx) > domain->xprd_half) answer = 1; \
if (domain->yperiodic && fabs(dy) > domain->yprd_half) answer = 1; \
if (domain->zperiodic && fabs(dz) > domain->zprd_half) answer = 1; \
}
#ifdef _LMP_INTEL_OFFLOAD
#pragma offload_attribute(pop)
#endif
namespace LAMMPS_NS {
class NPairIntel : public NPair {
public:
NPairIntel(class LAMMPS *);
~NPairIntel();
#ifdef _LMP_INTEL_OFFLOAD
void grow_stencil();
#endif
protected:
FixIntel *_fix;
#ifdef _LMP_INTEL_OFFLOAD
int _cop;
int *_off_map_stencil;
#endif
};
}
#endif
/* ERROR/WARNING messages:
E: Exclusion lists not yet supported for Intel offload
Self explanatory.
E: The 'package intel' command is required for /intel styles
Self explanatory.
E: Too many neighbor bins for USER-INTEL package.
The number of bins used in the stencil to check for neighboring atoms is too
high for the Intel package. Either increase the bin size in the input script
or recompile with a larger setting for INTEL_MAX_STENCIL in intel_preprocess.h.
*/

View File

@ -1,621 +0,0 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#include "neighbor.h"
#include "neighbor_omp.h"
#include "neigh_list.h"
#include "atom.h"
#include "atom_vec.h"
#include "molecule.h"
#include "comm.h"
#include "domain.h"
#include "group.h"
#include "my_page.h"
#include "error.h"
using namespace LAMMPS_NS;
/* ----------------------------------------------------------------------
N^2 search for all neighbors
every neighbor pair appears in list of both atoms i and j
------------------------------------------------------------------------- */
void Neighbor::full_nsq_omp(NeighList *list)
{
const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
const int bitmask = (includegroup) ? group->bitmask[includegroup] : 0;
const int molecular = atom->molecular;
const int moltemplate = (molecular == 2) ? 1 : 0;
NEIGH_OMP_INIT;
#if defined(_OPENMP)
#pragma omp parallel default(none) shared(list)
#endif
NEIGH_OMP_SETUP(nlocal);
int i,j,n,itype,jtype,which,imol,iatom;
tagint tagprev;
double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
int *neighptr;
double **x = atom->x;
int *type = atom->type;
int *mask = atom->mask;
tagint *tag = atom->tag;
tagint *molecule = atom->molecule;
tagint **special = atom->special;
int **nspecial = atom->nspecial;
int nall = atom->nlocal + atom->nghost;
int *molindex = atom->molindex;
int *molatom = atom->molatom;
Molecule **onemols = atom->avec->onemols;
int *ilist = list->ilist;
int *numneigh = list->numneigh;
int **firstneigh = list->firstneigh;
// each thread has its own page allocator
MyPage<int> &ipage = list->ipage[tid];
ipage.reset();
// loop over owned atoms, storing neighbors
for (i = ifrom; i < ito; i++) {
n = 0;
neighptr = ipage.vget();
itype = type[i];
xtmp = x[i][0];
ytmp = x[i][1];
ztmp = x[i][2];
if (moltemplate) {
imol = molindex[i];
iatom = molatom[i];
tagprev = tag[i] - iatom - 1;
}
// loop over all atoms, owned and ghost
// skip i = j
for (j = 0; j < nall; j++) {
if (includegroup && !(mask[j] & bitmask)) continue;
if (i == j) continue;
jtype = type[j];
if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
if (rsq <= cutneighsq[itype][jtype]) {
if (molecular) {
if (!moltemplate)
which = find_special(special[i],nspecial[i],tag[j]);
else if (imol >=0)
which = find_special(onemols[imol]->special[iatom],
onemols[imol]->nspecial[iatom],
tag[j]-tagprev);
else which = 0;
if (which == 0) neighptr[n++] = j;
else if (domain->minimum_image_check(delx,dely,delz))
neighptr[n++] = j;
else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
} else neighptr[n++] = j;
}
}
ilist[i] = i;
firstneigh[i] = neighptr;
numneigh[i] = n;
ipage.vgot(n);
if (ipage.status())
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
}
NEIGH_OMP_CLOSE;
list->inum = nlocal;
list->gnum = 0;
}
/* ----------------------------------------------------------------------
N^2 search for all neighbors
include neighbors of ghost atoms, but no "special neighbors" for ghosts
every neighbor pair appears in list of both atoms i and j
------------------------------------------------------------------------- */
void Neighbor::full_nsq_ghost_omp(NeighList *list)
{
const int nlocal = atom->nlocal;
const int nall = nlocal + atom->nghost;
const int molecular = atom->molecular;
const int moltemplate = (molecular == 2) ? 1 : 0;
NEIGH_OMP_INIT;
#if defined(_OPENMP)
#pragma omp parallel default(none) shared(list)
#endif
NEIGH_OMP_SETUP(nall);
int i,j,n,itype,jtype,which,imol,iatom;
tagint tagprev;
double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
int *neighptr;
double **x = atom->x;
int *type = atom->type;
int *mask = atom->mask;
tagint *tag = atom->tag;
tagint *molecule = atom->molecule;
tagint **special = atom->special;
int **nspecial = atom->nspecial;
int *molindex = atom->molindex;
int *molatom = atom->molatom;
Molecule **onemols = atom->avec->onemols;
int *ilist = list->ilist;
int *numneigh = list->numneigh;
int **firstneigh = list->firstneigh;
// each thread has its own page allocator
MyPage<int> &ipage = list->ipage[tid];
ipage.reset();
// loop over owned & ghost atoms, storing neighbors
for (i = ifrom; i < ito; i++) {
n = 0;
neighptr = ipage.vget();
itype = type[i];
xtmp = x[i][0];
ytmp = x[i][1];
ztmp = x[i][2];
if (moltemplate) {
imol = molindex[i];
iatom = molatom[i];
tagprev = tag[i] - iatom - 1;
}
// loop over all atoms, owned and ghost
// skip i = j
// no molecular test when i = ghost atom
if (i < nlocal) {
for (j = 0; j < nall; j++) {
if (i == j) continue;
jtype = type[j];
if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
if (rsq <= cutneighsq[itype][jtype]) {
if (molecular) {
if (!moltemplate)
which = find_special(special[i],nspecial[i],tag[j]);
else if (imol >=0)
which = find_special(onemols[imol]->special[iatom],
onemols[imol]->nspecial[iatom],
tag[j]-tagprev);
else which = 0;
if (which == 0) neighptr[n++] = j;
else if (domain->minimum_image_check(delx,dely,delz))
neighptr[n++] = j;
else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
} else neighptr[n++] = j;
}
}
} else {
for (j = 0; j < nall; j++) {
if (i == j) continue;
jtype = type[j];
if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
if (rsq <= cutneighghostsq[itype][jtype]) neighptr[n++] = j;
}
}
ilist[i] = i;
firstneigh[i] = neighptr;
numneigh[i] = n;
ipage.vgot(n);
if (ipage.status())
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
}
NEIGH_OMP_CLOSE;
list->inum = nlocal;
list->gnum = nall - nlocal;
}
/* ----------------------------------------------------------------------
binned neighbor list construction for all neighbors
every neighbor pair appears in list of both atoms i and j
------------------------------------------------------------------------- */
void Neighbor::full_bin_omp(NeighList *list)
{
// bin owned & ghost atoms
if (binatomflag) bin_atoms();
const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
const int molecular = atom->molecular;
const int moltemplate = (molecular == 2) ? 1 : 0;
NEIGH_OMP_INIT;
#if defined(_OPENMP)
#pragma omp parallel default(none) shared(list)
#endif
NEIGH_OMP_SETUP(nlocal);
int i,j,k,n,itype,jtype,ibin,which,imol,iatom;
tagint tagprev;
double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
int *neighptr;
double **x = atom->x;
int *type = atom->type;
int *mask = atom->mask;
tagint *tag = atom->tag;
tagint *molecule = atom->molecule;
tagint **special = atom->special;
int **nspecial = atom->nspecial;
int *molindex = atom->molindex;
int *molatom = atom->molatom;
Molecule **onemols = atom->avec->onemols;
int *ilist = list->ilist;
int *numneigh = list->numneigh;
int **firstneigh = list->firstneigh;
int nstencil = list->nstencil;
int *stencil = list->stencil;
// each thread has its own page allocator
MyPage<int> &ipage = list->ipage[tid];
ipage.reset();
// loop over owned atoms, storing neighbors
for (i = ifrom; i < ito; i++) {
n = 0;
neighptr = ipage.vget();
itype = type[i];
xtmp = x[i][0];
ytmp = x[i][1];
ztmp = x[i][2];
if (moltemplate) {
imol = molindex[i];
iatom = molatom[i];
tagprev = tag[i] - iatom - 1;
}
// loop over all atoms in surrounding bins in stencil including self
// skip i = j
ibin = coord2bin(x[i]);
for (k = 0; k < nstencil; k++) {
for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) {
if (i == j) continue;
jtype = type[j];
if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
if (rsq <= cutneighsq[itype][jtype]) {
if (molecular) {
if (!moltemplate)
which = find_special(special[i],nspecial[i],tag[j]);
else if (imol >=0)
which = find_special(onemols[imol]->special[iatom],
onemols[imol]->nspecial[iatom],
tag[j]-tagprev);
else which = 0;
if (which == 0) neighptr[n++] = j;
else if (domain->minimum_image_check(delx,dely,delz))
neighptr[n++] = j;
else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
} else neighptr[n++] = j;
}
}
}
ilist[i] = i;
firstneigh[i] = neighptr;
numneigh[i] = n;
ipage.vgot(n);
if (ipage.status())
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
}
NEIGH_OMP_CLOSE;
list->inum = nlocal;
list->gnum = 0;
}
/* ----------------------------------------------------------------------
binned neighbor list construction for all neighbors
include neighbors of ghost atoms, but no "special neighbors" for ghosts
every neighbor pair appears in list of both atoms i and j
------------------------------------------------------------------------- */
void Neighbor::full_bin_ghost_omp(NeighList *list)
{
// bin owned & ghost atoms
if (binatomflag) bin_atoms();
const int nlocal = atom->nlocal;
const int nall = nlocal + atom->nghost;
const int molecular = atom->molecular;
const int moltemplate = (molecular == 2) ? 1 : 0;
NEIGH_OMP_INIT;
#if defined(_OPENMP)
#pragma omp parallel default(none) shared(list)
#endif
NEIGH_OMP_SETUP(nall);
int i,j,k,n,itype,jtype,ibin,which,imol,iatom;
tagint tagprev;
double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
int xbin,ybin,zbin,xbin2,ybin2,zbin2;
int *neighptr;
double **x = atom->x;
int *type = atom->type;
int *mask = atom->mask;
tagint *tag = atom->tag;
tagint *molecule = atom->molecule;
tagint **special = atom->special;
int **nspecial = atom->nspecial;
int *molindex = atom->molindex;
int *molatom = atom->molatom;
Molecule **onemols = atom->avec->onemols;
int *ilist = list->ilist;
int *numneigh = list->numneigh;
int **firstneigh = list->firstneigh;
int nstencil = list->nstencil;
int *stencil = list->stencil;
int **stencilxyz = list->stencilxyz;
// each thread has its own page allocator
MyPage<int> &ipage = list->ipage[tid];
ipage.reset();
// loop over owned & ghost atoms, storing neighbors
for (i = ifrom; i < ito; i++) {
n = 0;
neighptr = ipage.vget();
itype = type[i];
xtmp = x[i][0];
ytmp = x[i][1];
ztmp = x[i][2];
if (moltemplate) {
imol = molindex[i];
iatom = molatom[i];
tagprev = tag[i] - iatom - 1;
}
// loop over all atoms in surrounding bins in stencil including self
// when i is a ghost atom, must check if stencil bin is out of bounds
// skip i = j
// no molecular test when i = ghost atom
if (i < nlocal) {
ibin = coord2bin(x[i]);
for (k = 0; k < nstencil; k++) {
for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) {
if (i == j) continue;
jtype = type[j];
if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
if (rsq <= cutneighsq[itype][jtype]) {
if (molecular) {
if (!moltemplate)
which = find_special(special[i],nspecial[i],tag[j]);
else if (imol >=0)
which = find_special(onemols[imol]->special[iatom],
onemols[imol]->nspecial[iatom],
tag[j]-tagprev);
else which = 0;
if (which == 0) neighptr[n++] = j;
else if (domain->minimum_image_check(delx,dely,delz))
neighptr[n++] = j;
else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
} else neighptr[n++] = j;
}
}
}
} else {
ibin = coord2bin(x[i],xbin,ybin,zbin);
for (k = 0; k < nstencil; k++) {
xbin2 = xbin + stencilxyz[k][0];
ybin2 = ybin + stencilxyz[k][1];
zbin2 = zbin + stencilxyz[k][2];
if (xbin2 < 0 || xbin2 >= mbinx ||
ybin2 < 0 || ybin2 >= mbiny ||
zbin2 < 0 || zbin2 >= mbinz) continue;
for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) {
if (i == j) continue;
jtype = type[j];
if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
if (rsq <= cutneighghostsq[itype][jtype]) neighptr[n++] = j;
}
}
}
ilist[i] = i;
firstneigh[i] = neighptr;
numneigh[i] = n;
ipage.vgot(n);
if (ipage.status())
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
}
NEIGH_OMP_CLOSE;
list->inum = nlocal;
list->gnum = nall - nlocal;
}
/* ----------------------------------------------------------------------
binned neighbor list construction for all neighbors
multi-type stencil is itype dependent and is distance checked
every neighbor pair appears in list of both atoms i and j
------------------------------------------------------------------------- */
void Neighbor::full_multi_omp(NeighList *list)
{
// bin local & ghost atoms
if (binatomflag) bin_atoms();
const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
const int molecular = atom->molecular;
const int moltemplate = (molecular == 2) ? 1 : 0;
NEIGH_OMP_INIT;
#if defined(_OPENMP)
#pragma omp parallel default(none) shared(list)
#endif
NEIGH_OMP_SETUP(nlocal);
int i,j,k,n,itype,jtype,ibin,which,ns,imol,iatom;
tagint tagprev;
double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
int *neighptr,*s;
double *cutsq,*distsq;
// loop over each atom, storing neighbors
double **x = atom->x;
int *type = atom->type;
int *mask = atom->mask;
tagint *tag = atom->tag;
tagint *molecule = atom->molecule;
tagint **special = atom->special;
int **nspecial = atom->nspecial;
int *molindex = atom->molindex;
int *molatom = atom->molatom;
Molecule **onemols = atom->avec->onemols;
int *ilist = list->ilist;
int *numneigh = list->numneigh;
int **firstneigh = list->firstneigh;
int *nstencil_multi = list->nstencil_multi;
int **stencil_multi = list->stencil_multi;
double **distsq_multi = list->distsq_multi;
// each thread has its own page allocator
MyPage<int> &ipage = list->ipage[tid];
ipage.reset();
for (i = ifrom; i < ito; i++) {
n = 0;
neighptr = ipage.vget();
itype = type[i];
xtmp = x[i][0];
ytmp = x[i][1];
ztmp = x[i][2];
if (moltemplate) {
imol = molindex[i];
iatom = molatom[i];
tagprev = tag[i] - iatom - 1;
}
// loop over all atoms in other bins in stencil, including self
// skip if i,j neighbor cutoff is less than bin distance
// skip i = j
ibin = coord2bin(x[i]);
s = stencil_multi[itype];
distsq = distsq_multi[itype];
cutsq = cutneighsq[itype];
ns = nstencil_multi[itype];
for (k = 0; k < ns; k++) {
for (j = binhead[ibin+s[k]]; j >= 0; j = bins[j]) {
jtype = type[j];
if (cutsq[jtype] < distsq[k]) continue;
if (i == j) continue;
if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
if (rsq <= cutneighsq[itype][jtype]) {
if (molecular) {
if (!moltemplate)
which = find_special(special[i],nspecial[i],tag[j]);
else if (imol >=0)
which = find_special(onemols[imol]->special[iatom],
onemols[imol]->nspecial[iatom],
tag[j]-tagprev);
else which = 0;
if (which == 0) neighptr[n++] = j;
else if (domain->minimum_image_check(delx,dely,delz))
neighptr[n++] = j;
else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
} else neighptr[n++] = j;
}
}
}
ilist[i] = i;
firstneigh[i] = neighptr;
numneigh[i] = n;
ipage.vgot(n);
if (ipage.status())
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
}
NEIGH_OMP_CLOSE;
list->inum = nlocal;
list->gnum = 0;
}

View File

@ -1,618 +0,0 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#include <string.h>
#include "neighbor.h"
#include "neighbor_omp.h"
#include "neigh_list.h"
#include "atom.h"
#include "comm.h"
#include "group.h"
#include "fix_shear_history.h"
#include "error.h"
using namespace LAMMPS_NS;
/* ----------------------------------------------------------------------
granular particles
N^2 / 2 search for neighbor pairs with partial Newton's 3rd law
shear history must be accounted for when a neighbor pair is added
pair added to list if atoms i and j are both owned and i < j
pair added if j is ghost (also stored by proc owning j)
------------------------------------------------------------------------- */
void Neighbor::granular_nsq_no_newton_omp(NeighList *list)
{
const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
const int bitmask = (includegroup) ? group->bitmask[includegroup] : 0;
FixShearHistory * const fix_history = list->fix_history;
NeighList * listgranhistory = list->listgranhistory;
NEIGH_OMP_INIT;
#if defined(_OPENMP)
#pragma omp parallel default(none) shared(list,listgranhistory)
#endif
NEIGH_OMP_SETUP(nlocal);
int i,j,m,n,nn,dnum,dnumbytes;
double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
double radi,radsum,cutsq;
int *neighptr,*touchptr;
double *shearptr;
int *npartner;
tagint **partner;
double **shearpartner;
int **firsttouch;
double **firstshear;
MyPage<int> *ipage_touch;
MyPage<double> *dpage_shear;
double **x = atom->x;
double *radius = atom->radius;
tagint *tag = atom->tag;
int *type = atom->type;
int *mask = atom->mask;
tagint *molecule = atom->molecule;
int nall = atom->nlocal + atom->nghost;
int *ilist = list->ilist;
int *numneigh = list->numneigh;
int **firstneigh = list->firstneigh;
// each thread has its own page allocator
MyPage<int> &ipage = list->ipage[tid];
ipage.reset();
if (fix_history) {
npartner = fix_history->npartner;
partner = fix_history->partner;
shearpartner = fix_history->shearpartner;
firsttouch = listgranhistory->firstneigh;
firstshear = listgranhistory->firstdouble;
ipage_touch = listgranhistory->ipage+tid;
dpage_shear = listgranhistory->dpage+tid;
dnum = listgranhistory->dnum;
dnumbytes = dnum * sizeof(double);
ipage_touch->reset();
dpage_shear->reset();
}
for (i = ifrom; i < ito; i++) {
n = 0;
neighptr = ipage.vget();
if (fix_history) {
nn = 0;
touchptr = ipage_touch->vget();
shearptr = dpage_shear->vget();
}
xtmp = x[i][0];
ytmp = x[i][1];
ztmp = x[i][2];
radi = radius[i];
// loop over remaining atoms, owned and ghost
for (j = i+1; j < nall; j++) {
if (includegroup && !(mask[j] & bitmask)) continue;
if (exclude && exclusion(i,j,type[i],type[j],mask,molecule)) continue;
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
radsum = radi + radius[j];
cutsq = (radsum+skin) * (radsum+skin);
if (rsq <= cutsq) {
neighptr[n] = j;
if (fix_history) {
if (rsq < radsum*radsum) {
for (m = 0; m < npartner[i]; m++)
if (partner[i][m] == tag[j]) break;
if (m < npartner[i]) {
touchptr[n] = 1;
memcpy(&shearptr[nn],&shearpartner[i][dnum*m],dnumbytes);
nn += dnum;
} else {
touchptr[n] = 0;
memcpy(&shearptr[nn],zeroes,dnumbytes);
nn += dnum;
}
} else {
touchptr[n] = 0;
memcpy(&shearptr[nn],zeroes,dnumbytes);
nn += dnum;
}
}
n++;
}
}
ilist[i] = i;
firstneigh[i] = neighptr;
numneigh[i] = n;
ipage.vgot(n);
if (ipage.status())
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
if (fix_history) {
firsttouch[i] = touchptr;
firstshear[i] = shearptr;
ipage_touch->vgot(n);
dpage_shear->vgot(nn);
}
}
NEIGH_OMP_CLOSE;
list->inum = nlocal;
}
/* ----------------------------------------------------------------------
granular particles
N^2 / 2 search for neighbor pairs with full Newton's 3rd law
no shear history is allowed for this option
pair added to list if atoms i and j are both owned and i < j
if j is ghost only me or other proc adds pair
decision based on itag,jtag tests
------------------------------------------------------------------------- */
void Neighbor::granular_nsq_newton_omp(NeighList *list)
{
const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
const int bitmask = (includegroup) ? group->bitmask[includegroup] : 0;
NEIGH_OMP_INIT;
#if defined(_OPENMP)
#pragma omp parallel default(none) shared(list)
#endif
NEIGH_OMP_SETUP(nlocal);
int i,j,n,itag,jtag;
double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
double radi,radsum,cutsq;
int *neighptr;
double **x = atom->x;
double *radius = atom->radius;
tagint *tag = atom->tag;
int *type = atom->type;
int *mask = atom->mask;
tagint *molecule = atom->molecule;
int nall = atom->nlocal + atom->nghost;
int *ilist = list->ilist;
int *numneigh = list->numneigh;
int **firstneigh = list->firstneigh;
// each thread has its own page allocator
MyPage<int> &ipage = list->ipage[tid];
ipage.reset();
for (i = ifrom; i < ito; i++) {
n = 0;
neighptr = ipage.vget();
itag = tag[i];
xtmp = x[i][0];
ytmp = x[i][1];
ztmp = x[i][2];
radi = radius[i];
// loop over remaining atoms, owned and ghost
for (j = i+1; j < nall; j++) {
if (includegroup && !(mask[j] & bitmask)) continue;
if (j >= nlocal) {
jtag = tag[j];
if (itag > jtag) {
if ((itag+jtag) % 2 == 0) continue;
} else if (itag < jtag) {
if ((itag+jtag) % 2 == 1) continue;
} else {
if (x[j][2] < ztmp) continue;
if (x[j][2] == ztmp) {
if (x[j][1] < ytmp) continue;
if (x[j][1] == ytmp && x[j][0] < xtmp) continue;
}
}
}
if (exclude && exclusion(i,j,type[i],type[j],mask,molecule)) continue;
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
radsum = radi + radius[j];
cutsq = (radsum+skin) * (radsum+skin);
if (rsq <= cutsq) neighptr[n++] = j;
}
ilist[i] = i;
firstneigh[i] = neighptr;
numneigh[i] = n;
ipage.vgot(n);
if (ipage.status())
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
}
NEIGH_OMP_CLOSE;
list->inum = nlocal;
}
/* ----------------------------------------------------------------------
granular particles
binned neighbor list construction with partial Newton's 3rd law
shear history must be accounted for when a neighbor pair is added
each owned atom i checks own bin and surrounding bins in non-Newton stencil
pair stored once if i,j are both owned and i < j
pair stored by me if j is ghost (also stored by proc owning j)
------------------------------------------------------------------------- */
void Neighbor::granular_bin_no_newton_omp(NeighList *list)
{
// bin local & ghost atoms
if (binatomflag) bin_atoms();
const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
FixShearHistory * const fix_history = list->fix_history;
NeighList * listgranhistory = list->listgranhistory;
NEIGH_OMP_INIT;
#if defined(_OPENMP)
#pragma omp parallel default(none) shared(list,listgranhistory)
#endif
NEIGH_OMP_SETUP(nlocal);
int i,j,k,m,n,nn,ibin,dnum,dnumbytes;
double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
double radi,radsum,cutsq;
int *neighptr,*touchptr;
double *shearptr;
MyPage<int> *ipage_touch;
MyPage<double> *dpage_shear;
int *npartner;
tagint **partner;
double **shearpartner;
int **firsttouch;
double **firstshear;
// loop over each atom, storing neighbors
double **x = atom->x;
double *radius = atom->radius;
tagint *tag = atom->tag;
int *type = atom->type;
int *mask = atom->mask;
tagint *molecule = atom->molecule;
int *ilist = list->ilist;
int *numneigh = list->numneigh;
int **firstneigh = list->firstneigh;
int nstencil = list->nstencil;
int *stencil = list->stencil;
// each thread has its own page allocator
MyPage<int> &ipage = list->ipage[tid];
ipage.reset();
if (fix_history) {
npartner = fix_history->npartner;
partner = fix_history->partner;
shearpartner = fix_history->shearpartner;
firsttouch = listgranhistory->firstneigh;
firstshear = listgranhistory->firstdouble;
ipage_touch = listgranhistory->ipage+tid;
dpage_shear = listgranhistory->dpage+tid;
dnum = listgranhistory->dnum;
dnumbytes = dnum * sizeof(double);
ipage_touch->reset();
dpage_shear->reset();
}
for (i = ifrom; i < ito; i++) {
n = 0;
neighptr = ipage.vget();
if (fix_history) {
nn = 0;
touchptr = ipage_touch->vget();
shearptr = dpage_shear->vget();
}
xtmp = x[i][0];
ytmp = x[i][1];
ztmp = x[i][2];
radi = radius[i];
ibin = coord2bin(x[i]);
// loop over all atoms in surrounding bins in stencil including self
// only store pair if i < j
// stores own/own pairs only once
// stores own/ghost pairs on both procs
for (k = 0; k < nstencil; k++) {
for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) {
if (j <= i) continue;
if (exclude && exclusion(i,j,type[i],type[j],mask,molecule)) continue;
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
radsum = radi + radius[j];
cutsq = (radsum+skin) * (radsum+skin);
if (rsq <= cutsq) {
neighptr[n] = j;
if (fix_history) {
if (rsq < radsum*radsum) {
for (m = 0; m < npartner[i]; m++)
if (partner[i][m] == tag[j]) break;
if (m < npartner[i]) {
touchptr[n] = 1;
memcpy(&shearptr[nn],&shearpartner[i][dnum*m],dnumbytes);
nn += dnum;
} else {
touchptr[n] = 0;
memcpy(&shearptr[nn],zeroes,dnumbytes);
nn += dnum;
}
} else {
touchptr[n] = 0;
memcpy(&shearptr[nn],zeroes,dnumbytes);
nn += dnum;
}
}
n++;
}
}
}
ilist[i] = i;
firstneigh[i] = neighptr;
numneigh[i] = n;
ipage.vgot(n);
if (ipage.status())
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
if (fix_history) {
firsttouch[i] = touchptr;
firstshear[i] = shearptr;
ipage_touch->vgot(n);
dpage_shear->vgot(nn);
}
}
NEIGH_OMP_CLOSE;
list->inum = nlocal;
}
/* ----------------------------------------------------------------------
granular particles
binned neighbor list construction with full Newton's 3rd law
no shear history is allowed for this option
each owned atom i checks its own bin and other bins in Newton stencil
every pair stored exactly once by some processor
------------------------------------------------------------------------- */
void Neighbor::granular_bin_newton_omp(NeighList *list)
{
// bin local & ghost atoms
if (binatomflag) bin_atoms();
const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
NEIGH_OMP_INIT;
#if defined(_OPENMP)
#pragma omp parallel default(none) shared(list)
#endif
NEIGH_OMP_SETUP(nlocal);
int i,j,k,n,ibin;
double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
double radi,radsum,cutsq;
int *neighptr;
// loop over each atom, storing neighbors
double **x = atom->x;
double *radius = atom->radius;
int *type = atom->type;
int *mask = atom->mask;
tagint *molecule = atom->molecule;
int *ilist = list->ilist;
int *numneigh = list->numneigh;
int **firstneigh = list->firstneigh;
int nstencil = list->nstencil;
int *stencil = list->stencil;
// each thread has its own page allocator
MyPage<int> &ipage = list->ipage[tid];
ipage.reset();
for (i = ifrom; i < ito; i++) {
n = 0;
neighptr = ipage.vget();
xtmp = x[i][0];
ytmp = x[i][1];
ztmp = x[i][2];
radi = radius[i];
// loop over rest of atoms in i's bin, ghosts are at end of linked list
// if j is owned atom, store it, since j is beyond i in linked list
// if j is ghost, only store if j coords are "above and to the right" of i
for (j = bins[i]; j >= 0; j = bins[j]) {
if (j >= nlocal) {
if (x[j][2] < ztmp) continue;
if (x[j][2] == ztmp) {
if (x[j][1] < ytmp) continue;
if (x[j][1] == ytmp && x[j][0] < xtmp) continue;
}
}
if (exclude && exclusion(i,j,type[i],type[j],mask,molecule)) continue;
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
radsum = radi + radius[j];
cutsq = (radsum+skin) * (radsum+skin);
if (rsq <= cutsq) neighptr[n++] = j;
}
// loop over all atoms in other bins in stencil, store every pair
ibin = coord2bin(x[i]);
for (k = 0; k < nstencil; k++) {
for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) {
if (exclude && exclusion(i,j,type[i],type[j],mask,molecule)) continue;
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
radsum = radi + radius[j];
cutsq = (radsum+skin) * (radsum+skin);
if (rsq <= cutsq) neighptr[n++] = j;
}
}
ilist[i] = i;
firstneigh[i] = neighptr;
numneigh[i] = n;
ipage.vgot(n);
if (ipage.status())
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
}
NEIGH_OMP_CLOSE;
list->inum = nlocal;
}
/* ----------------------------------------------------------------------
granular particles
binned neighbor list construction with Newton's 3rd law for triclinic
no shear history is allowed for this option
each owned atom i checks its own bin and other bins in triclinic stencil
every pair stored exactly once by some processor
------------------------------------------------------------------------- */
void Neighbor::granular_bin_newton_tri_omp(NeighList *list)
{
// bin local & ghost atoms
if (binatomflag) bin_atoms();
const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
NEIGH_OMP_INIT;
#if defined(_OPENMP)
#pragma omp parallel default(none) shared(list)
#endif
NEIGH_OMP_SETUP(nlocal);
int i,j,k,n,ibin;
double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
double radi,radsum,cutsq;
int *neighptr;
// loop over each atom, storing neighbors
double **x = atom->x;
double *radius = atom->radius;
int *type = atom->type;
int *mask = atom->mask;
tagint *molecule = atom->molecule;
int *ilist = list->ilist;
int *numneigh = list->numneigh;
int **firstneigh = list->firstneigh;
int nstencil = list->nstencil;
int *stencil = list->stencil;
// each thread has its own page allocator
MyPage<int> &ipage = list->ipage[tid];
ipage.reset();
for (i = ifrom; i < ito; i++) {
n = 0;
neighptr = ipage.vget();
xtmp = x[i][0];
ytmp = x[i][1];
ztmp = x[i][2];
radi = radius[i];
// loop over all atoms in bins in stencil
// pairs for atoms j "below" i are excluded
// below = lower z or (equal z and lower y) or (equal zy and lower x)
// (equal zyx and j <= i)
// latter excludes self-self interaction but allows superposed atoms
ibin = coord2bin(x[i]);
for (k = 0; k < nstencil; k++) {
for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) {
if (x[j][2] < ztmp) continue;
if (x[j][2] == ztmp) {
if (x[j][1] < ytmp) continue;
if (x[j][1] == ytmp) {
if (x[j][0] < xtmp) continue;
if (x[j][0] == xtmp && j <= i) continue;
}
}
if (exclude && exclusion(i,j,type[i],type[j],mask,molecule)) continue;
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
radsum = radi + radius[j];
cutsq = (radsum+skin) * (radsum+skin);
if (rsq <= cutsq) neighptr[n++] = j;
}
}
ilist[i] = i;
firstneigh[i] = neighptr;
numneigh[i] = n;
ipage.vgot(n);
if (ipage.status())
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
}
NEIGH_OMP_CLOSE;
list->inum = nlocal;
}

View File

@ -1,559 +0,0 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#include "neighbor.h"
#include "neighbor_omp.h"
#include "neigh_list.h"
#include "atom.h"
#include "atom_vec.h"
#include "molecule.h"
#include "comm.h"
#include "domain.h"
#include "my_page.h"
#include "error.h"
using namespace LAMMPS_NS;
/* ----------------------------------------------------------------------
binned neighbor list construction with partial Newton's 3rd law
each owned atom i checks own bin and other bins in stencil
pair stored once if i,j are both owned and i < j
pair stored by me if j is ghost (also stored by proc owning j)
------------------------------------------------------------------------- */
void Neighbor::half_bin_no_newton_omp(NeighList *list)
{
// bin local & ghost atoms
if (binatomflag) bin_atoms();
const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
const int molecular = atom->molecular;
const int moltemplate = (molecular == 2) ? 1 : 0;
NEIGH_OMP_INIT;
#if defined(_OPENMP)
#pragma omp parallel default(none) shared(list)
#endif
NEIGH_OMP_SETUP(nlocal);
int i,j,k,n,itype,jtype,ibin,which,imol,iatom;
tagint tagprev;
double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
int *neighptr;
// loop over each atom, storing neighbors
double **x = atom->x;
int *type = atom->type;
int *mask = atom->mask;
tagint *tag = atom->tag;
tagint *molecule = atom->molecule;
tagint **special = atom->special;
int **nspecial = atom->nspecial;
int *molindex = atom->molindex;
int *molatom = atom->molatom;
Molecule **onemols = atom->avec->onemols;
int *ilist = list->ilist;
int *numneigh = list->numneigh;
int **firstneigh = list->firstneigh;
int nstencil = list->nstencil;
int *stencil = list->stencil;
// each thread has its own page allocator
MyPage<int> &ipage = list->ipage[tid];
ipage.reset();
for (i = ifrom; i < ito; i++) {
n = 0;
neighptr = ipage.vget();
itype = type[i];
xtmp = x[i][0];
ytmp = x[i][1];
ztmp = x[i][2];
if (moltemplate) {
imol = molindex[i];
iatom = molatom[i];
tagprev = tag[i] - iatom - 1;
}
// loop over all atoms in other bins in stencil including self
// only store pair if i < j
// stores own/own pairs only once
// stores own/ghost pairs on both procs
ibin = coord2bin(x[i]);
for (k = 0; k < nstencil; k++) {
for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) {
if (j <= i) continue;
jtype = type[j];
if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
if (rsq <= cutneighsq[itype][jtype]) {
if (molecular) {
if (!moltemplate)
which = find_special(special[i],nspecial[i],tag[j]);
else if (imol >=0)
which = find_special(onemols[imol]->special[iatom],
onemols[imol]->nspecial[iatom],
tag[j]-tagprev);
else which = 0;
if (which == 0) neighptr[n++] = j;
else if (domain->minimum_image_check(delx,dely,delz))
neighptr[n++] = j;
else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
} else neighptr[n++] = j;
}
}
}
ilist[i] = i;
firstneigh[i] = neighptr;
numneigh[i] = n;
ipage.vgot(n);
if (ipage.status())
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
}
NEIGH_OMP_CLOSE;
list->inum = nlocal;
}
/* ----------------------------------------------------------------------
binned neighbor list construction with partial Newton's 3rd law
include neighbors of ghost atoms, but no "special neighbors" for ghosts
owned and ghost atoms check own bin and other bins in stencil
pair stored once if i,j are both owned and i < j
pair stored by me if i owned and j ghost (also stored by proc owning j)
pair stored once if i,j are both ghost and i < j
------------------------------------------------------------------------- */
void Neighbor::half_bin_no_newton_ghost_omp(NeighList *list)
{
// bin local & ghost atoms
if (binatomflag) bin_atoms();
const int nlocal = atom->nlocal;
const int nall = nlocal + atom->nghost;
const int molecular = atom->molecular;
const int moltemplate = (molecular == 2) ? 1 : 0;
NEIGH_OMP_INIT;
#if defined(_OPENMP)
#pragma omp parallel default(none) shared(list)
#endif
NEIGH_OMP_SETUP(nall);
int i,j,k,n,itype,jtype,ibin,which,imol,iatom;
tagint tagprev;
int xbin,ybin,zbin,xbin2,ybin2,zbin2;
double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
int *neighptr;
// loop over each atom, storing neighbors
double **x = atom->x;
int *type = atom->type;
int *mask = atom->mask;
tagint *tag = atom->tag;
tagint *molecule = atom->molecule;
tagint **special = atom->special;
int **nspecial = atom->nspecial;
int *molindex = atom->molindex;
int *molatom = atom->molatom;
Molecule **onemols = atom->avec->onemols;
int *ilist = list->ilist;
int *numneigh = list->numneigh;
int **firstneigh = list->firstneigh;
int nstencil = list->nstencil;
int *stencil = list->stencil;
int **stencilxyz = list->stencilxyz;
// each thread has its own page allocator
MyPage<int> &ipage = list->ipage[tid];
ipage.reset();
for (i = ifrom; i < ito; i++) {
n = 0;
neighptr = ipage.vget();
itype = type[i];
xtmp = x[i][0];
ytmp = x[i][1];
ztmp = x[i][2];
if (moltemplate) {
imol = molindex[i];
iatom = molatom[i];
tagprev = tag[i] - iatom - 1;
}
// loop over all atoms in other bins in stencil including self
// when i is a ghost atom, must check if stencil bin is out of bounds
// only store pair if i < j
// stores own/own pairs only once
// stores own/ghost pairs with owned atom only, on both procs
// stores ghost/ghost pairs only once
// no molecular test when i = ghost atom
if (i < nlocal) {
ibin = coord2bin(x[i]);
for (k = 0; k < nstencil; k++) {
for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) {
if (j <= i) continue;
jtype = type[j];
if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
if (rsq <= cutneighsq[itype][jtype]) {
if (molecular) {
if (!moltemplate)
which = find_special(special[i],nspecial[i],tag[j]);
else if (imol >=0)
which = find_special(onemols[imol]->special[iatom],
onemols[imol]->nspecial[iatom],
tag[j]-tagprev);
else which = 0;
if (which == 0) neighptr[n++] = j;
else if (domain->minimum_image_check(delx,dely,delz))
neighptr[n++] = j;
else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
} else neighptr[n++] = j;
}
}
}
} else {
ibin = coord2bin(x[i],xbin,ybin,zbin);
for (k = 0; k < nstencil; k++) {
xbin2 = xbin + stencilxyz[k][0];
ybin2 = ybin + stencilxyz[k][1];
zbin2 = zbin + stencilxyz[k][2];
if (xbin2 < 0 || xbin2 >= mbinx ||
ybin2 < 0 || ybin2 >= mbiny ||
zbin2 < 0 || zbin2 >= mbinz) continue;
for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) {
if (j <= i) continue;
jtype = type[j];
if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
if (rsq <= cutneighghostsq[itype][jtype]) neighptr[n++] = j;
}
}
}
ilist[i] = i;
firstneigh[i] = neighptr;
numneigh[i] = n;
ipage.vgot(n);
if (ipage.status())
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
}
NEIGH_OMP_CLOSE;
list->inum = nlocal;
list->gnum = nall - atom->nlocal;
}
/* ----------------------------------------------------------------------
binned neighbor list construction with full Newton's 3rd law
each owned atom i checks its own bin and other bins in Newton stencil
every pair stored exactly once by some processor
------------------------------------------------------------------------- */
void Neighbor::half_bin_newton_omp(NeighList *list)
{
// bin local & ghost atoms
if (binatomflag) bin_atoms();
const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
const int molecular = atom->molecular;
const int moltemplate = (molecular == 2) ? 1 : 0;
NEIGH_OMP_INIT;
#if defined(_OPENMP)
#pragma omp parallel default(none) shared(list)
#endif
NEIGH_OMP_SETUP(nlocal);
int i,j,k,n,itype,jtype,ibin,which,imol,iatom;
tagint tagprev;
double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
int *neighptr;
// loop over each atom, storing neighbors
double **x = atom->x;
int *type = atom->type;
int *mask = atom->mask;
tagint *tag = atom->tag;
tagint *molecule = atom->molecule;
tagint **special = atom->special;
int **nspecial = atom->nspecial;
int *molindex = atom->molindex;
int *molatom = atom->molatom;
Molecule **onemols = atom->avec->onemols;
int *ilist = list->ilist;
int *numneigh = list->numneigh;
int **firstneigh = list->firstneigh;
int nstencil = list->nstencil;
int *stencil = list->stencil;
// each thread has its own page allocator
MyPage<int> &ipage = list->ipage[tid];
ipage.reset();
for (i = ifrom; i < ito; i++) {
n = 0;
neighptr = ipage.vget();
itype = type[i];
xtmp = x[i][0];
ytmp = x[i][1];
ztmp = x[i][2];
if (moltemplate) {
imol = molindex[i];
iatom = molatom[i];
tagprev = tag[i] - iatom - 1;
}
// loop over rest of atoms in i's bin, ghosts are at end of linked list
// if j is owned atom, store it, since j is beyond i in linked list
// if j is ghost, only store if j coords are "above and to the right" of i
for (j = bins[i]; j >= 0; j = bins[j]) {
if (j >= nlocal) {
if (x[j][2] < ztmp) continue;
if (x[j][2] == ztmp) {
if (x[j][1] < ytmp) continue;
if (x[j][1] == ytmp && x[j][0] < xtmp) continue;
}
}
jtype = type[j];
if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
if (rsq <= cutneighsq[itype][jtype]) {
if (molecular) {
if (!moltemplate)
which = find_special(special[i],nspecial[i],tag[j]);
else if (imol >=0)
which = find_special(onemols[imol]->special[iatom],
onemols[imol]->nspecial[iatom],
tag[j]-tagprev);
else which = 0;
if (which == 0) neighptr[n++] = j;
else if (domain->minimum_image_check(delx,dely,delz))
neighptr[n++] = j;
else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
// OLD: if (which >= 0) neighptr[n++] = j ^ (which << SBBITS);
} else neighptr[n++] = j;
}
}
// loop over all atoms in other bins in stencil, store every pair
ibin = coord2bin(x[i]);
for (k = 0; k < nstencil; k++) {
for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) {
jtype = type[j];
if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
if (rsq <= cutneighsq[itype][jtype]) {
if (molecular) {
if (!moltemplate)
which = find_special(special[i],nspecial[i],tag[j]);
else if (imol >=0)
which = find_special(onemols[imol]->special[iatom],
onemols[imol]->nspecial[iatom],
tag[j]-tagprev);
else which = 0;
if (which == 0) neighptr[n++] = j;
else if (domain->minimum_image_check(delx,dely,delz))
neighptr[n++] = j;
else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
// OLD: if (which >= 0) neighptr[n++] = j ^ (which << SBBITS);
} else neighptr[n++] = j;
}
}
}
ilist[i] = i;
firstneigh[i] = neighptr;
numneigh[i] = n;
ipage.vgot(n);
if (ipage.status())
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
}
NEIGH_OMP_CLOSE;
list->inum = nlocal;
}
/* ----------------------------------------------------------------------
binned neighbor list construction with Newton's 3rd law for triclinic
each owned atom i checks its own bin and other bins in triclinic stencil
every pair stored exactly once by some processor
------------------------------------------------------------------------- */
void Neighbor::half_bin_newton_tri_omp(NeighList *list)
{
// bin local & ghost atoms
if (binatomflag) bin_atoms();
const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
const int molecular = atom->molecular;
const int moltemplate = (molecular == 2) ? 1 : 0;
NEIGH_OMP_INIT;
#if defined(_OPENMP)
#pragma omp parallel default(none) shared(list)
#endif
NEIGH_OMP_SETUP(nlocal);
int i,j,k,n,itype,jtype,ibin,which,imol,iatom;
tagint tagprev;
double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
int *neighptr;
// loop over each atom, storing neighbors
double **x = atom->x;
int *type = atom->type;
int *mask = atom->mask;
tagint *tag = atom->tag;
tagint *molecule = atom->molecule;
tagint **special = atom->special;
int **nspecial = atom->nspecial;
int *molindex = atom->molindex;
int *molatom = atom->molatom;
Molecule **onemols = atom->avec->onemols;
int *ilist = list->ilist;
int *numneigh = list->numneigh;
int **firstneigh = list->firstneigh;
int nstencil = list->nstencil;
int *stencil = list->stencil;
// each thread has its own page allocator
MyPage<int> &ipage = list->ipage[tid];
ipage.reset();
for (i = ifrom; i < ito; i++) {
n = 0;
neighptr = ipage.vget();
itype = type[i];
xtmp = x[i][0];
ytmp = x[i][1];
ztmp = x[i][2];
if (moltemplate) {
imol = molindex[i];
iatom = molatom[i];
tagprev = tag[i] - iatom - 1;
}
// loop over all atoms in bins in stencil
// pairs for atoms j "below" i are excluded
// below = lower z or (equal z and lower y) or (equal zy and lower x)
// (equal zyx and j <= i)
// latter excludes self-self interaction but allows superposed atoms
ibin = coord2bin(x[i]);
for (k = 0; k < nstencil; k++) {
for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) {
if (x[j][2] < ztmp) continue;
if (x[j][2] == ztmp) {
if (x[j][1] < ytmp) continue;
if (x[j][1] == ytmp) {
if (x[j][0] < xtmp) continue;
if (x[j][0] == xtmp && j <= i) continue;
}
}
jtype = type[j];
if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
if (rsq <= cutneighsq[itype][jtype]) {
if (molecular) {
if (!moltemplate)
which = find_special(special[i],nspecial[i],tag[j]);
else if (imol >=0)
which = find_special(onemols[imol]->special[iatom],
onemols[imol]->nspecial[iatom],
tag[j]-tagprev);
else which = 0;
if (which == 0) neighptr[n++] = j;
else if (domain->minimum_image_check(delx,dely,delz))
neighptr[n++] = j;
else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
} else neighptr[n++] = j;
}
}
}
ilist[i] = i;
firstneigh[i] = neighptr;
numneigh[i] = n;
ipage.vgot(n);
if (ipage.status())
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
}
NEIGH_OMP_CLOSE;
list->inum = nlocal;
}

View File

@ -1,435 +0,0 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#include "neighbor.h"
#include "neighbor_omp.h"
#include "neigh_list.h"
#include "atom.h"
#include "atom_vec.h"
#include "molecule.h"
#include "comm.h"
#include "domain.h"
#include "my_page.h"
#include "error.h"
using namespace LAMMPS_NS;
/* ----------------------------------------------------------------------
binned neighbor list construction with partial Newton's 3rd law
each owned atom i checks own bin and other bins in stencil
multi-type stencil is itype dependent and is distance checked
pair stored once if i,j are both owned and i < j
pair stored by me if j is ghost (also stored by proc owning j)
------------------------------------------------------------------------- */
void Neighbor::half_multi_no_newton_omp(NeighList *list)
{
// bin local & ghost atoms
if (binatomflag) bin_atoms();
const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
const int molecular = atom->molecular;
const int moltemplate = (molecular == 2) ? 1 : 0;
NEIGH_OMP_INIT;
#if defined(_OPENMP)
#pragma omp parallel default(none) shared(list)
#endif
NEIGH_OMP_SETUP(nlocal);
int i,j,k,n,itype,jtype,ibin,which,ns,imol,iatom;
tagint tagprev;
double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
int *neighptr,*s;
double *cutsq,*distsq;
// loop over each atom, storing neighbors
double **x = atom->x;
int *type = atom->type;
int *mask = atom->mask;
tagint *tag = atom->tag;
tagint *molecule = atom->molecule;
tagint **special = atom->special;
int **nspecial = atom->nspecial;
int *molindex = atom->molindex;
int *molatom = atom->molatom;
Molecule **onemols = atom->avec->onemols;
int *ilist = list->ilist;
int *numneigh = list->numneigh;
int **firstneigh = list->firstneigh;
int *nstencil_multi = list->nstencil_multi;
int **stencil_multi = list->stencil_multi;
double **distsq_multi = list->distsq_multi;
// each thread has its own page allocator
MyPage<int> &ipage = list->ipage[tid];
ipage.reset();
for (i = ifrom; i < ito; i++) {
n = 0;
neighptr = ipage.vget();
itype = type[i];
xtmp = x[i][0];
ytmp = x[i][1];
ztmp = x[i][2];
if (moltemplate) {
imol = molindex[i];
iatom = molatom[i];
tagprev = tag[i] - iatom - 1;
}
// loop over all atoms in other bins in stencil including self
// only store pair if i < j
// skip if i,j neighbor cutoff is less than bin distance
// stores own/own pairs only once
// stores own/ghost pairs on both procs
ibin = coord2bin(x[i]);
s = stencil_multi[itype];
distsq = distsq_multi[itype];
cutsq = cutneighsq[itype];
ns = nstencil_multi[itype];
for (k = 0; k < ns; k++) {
for (j = binhead[ibin+s[k]]; j >= 0; j = bins[j]) {
if (j <= i) continue;
jtype = type[j];
if (cutsq[jtype] < distsq[k]) continue;
if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
if (rsq <= cutneighsq[itype][jtype]) {
if (molecular) {
if (!moltemplate)
which = find_special(special[i],nspecial[i],tag[j]);
else if (imol >=0)
which = find_special(onemols[imol]->special[iatom],
onemols[imol]->nspecial[iatom],
tag[j]-tagprev);
else which = 0;
if (which == 0) neighptr[n++] = j;
else if (domain->minimum_image_check(delx,dely,delz))
neighptr[n++] = j;
else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
} else neighptr[n++] = j;
}
}
}
ilist[i] = i;
firstneigh[i] = neighptr;
numneigh[i] = n;
ipage.vgot(n);
if (ipage.status())
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
}
NEIGH_OMP_CLOSE;
list->inum = nlocal;
}
/* ----------------------------------------------------------------------
binned neighbor list construction with full Newton's 3rd law
each owned atom i checks its own bin and other bins in Newton stencil
multi-type stencil is itype dependent and is distance checked
every pair stored exactly once by some processor
------------------------------------------------------------------------- */
void Neighbor::half_multi_newton_omp(NeighList *list)
{
// bin local & ghost atoms
if (binatomflag) bin_atoms();
const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
const int molecular = atom->molecular;
const int moltemplate = (molecular == 2) ? 1 : 0;
NEIGH_OMP_INIT;
#if defined(_OPENMP)
#pragma omp parallel default(none) shared(list)
#endif
NEIGH_OMP_SETUP(nlocal);
int i,j,k,n,itype,jtype,ibin,which,ns,imol,iatom;
tagint tagprev;
double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
int *neighptr,*s;
double *cutsq,*distsq;
// loop over each atom, storing neighbors
double **x = atom->x;
int *type = atom->type;
int *mask = atom->mask;
tagint *tag = atom->tag;
tagint *molecule = atom->molecule;
tagint **special = atom->special;
int **nspecial = atom->nspecial;
int *molindex = atom->molindex;
int *molatom = atom->molatom;
Molecule **onemols = atom->avec->onemols;
int *ilist = list->ilist;
int *numneigh = list->numneigh;
int **firstneigh = list->firstneigh;
int *nstencil_multi = list->nstencil_multi;
int **stencil_multi = list->stencil_multi;
double **distsq_multi = list->distsq_multi;
// each thread has its own page allocator
MyPage<int> &ipage = list->ipage[tid];
ipage.reset();
for (i = ifrom; i < ito; i++) {
n = 0;
neighptr = ipage.vget();
itype = type[i];
xtmp = x[i][0];
ytmp = x[i][1];
ztmp = x[i][2];
if (moltemplate) {
imol = molindex[i];
iatom = molatom[i];
tagprev = tag[i] - iatom - 1;
}
// loop over rest of atoms in i's bin, ghosts are at end of linked list
// if j is owned atom, store it, since j is beyond i in linked list
// if j is ghost, only store if j coords are "above and to the right" of i
for (j = bins[i]; j >= 0; j = bins[j]) {
if (j >= nlocal) {
if (x[j][2] < ztmp) continue;
if (x[j][2] == ztmp) {
if (x[j][1] < ytmp) continue;
if (x[j][1] == ytmp && x[j][0] < xtmp) continue;
}
}
jtype = type[j];
if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
if (rsq <= cutneighsq[itype][jtype]) {
if (molecular) {
if (!moltemplate)
which = find_special(special[i],nspecial[i],tag[j]);
else if (imol >=0)
which = find_special(onemols[imol]->special[iatom],
onemols[imol]->nspecial[iatom],
tag[j]-tagprev);
else which = 0;
if (which == 0) neighptr[n++] = j;
else if (domain->minimum_image_check(delx,dely,delz))
neighptr[n++] = j;
else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
} else neighptr[n++] = j;
}
}
// loop over all atoms in other bins in stencil, store every pair
// skip if i,j neighbor cutoff is less than bin distance
ibin = coord2bin(x[i]);
s = stencil_multi[itype];
distsq = distsq_multi[itype];
cutsq = cutneighsq[itype];
ns = nstencil_multi[itype];
for (k = 0; k < ns; k++) {
for (j = binhead[ibin+s[k]]; j >= 0; j = bins[j]) {
jtype = type[j];
if (cutsq[jtype] < distsq[k]) continue;
if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
if (rsq <= cutneighsq[itype][jtype]) {
if (molecular) {
if (!moltemplate)
which = find_special(special[i],nspecial[i],tag[j]);
else if (imol >=0)
which = find_special(onemols[imol]->special[iatom],
onemols[imol]->nspecial[iatom],
tag[j]-tagprev);
else which = 0;
if (which == 0) neighptr[n++] = j;
else if (domain->minimum_image_check(delx,dely,delz))
neighptr[n++] = j;
else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
} else neighptr[n++] = j;
}
}
}
ilist[i] = i;
firstneigh[i] = neighptr;
numneigh[i] = n;
ipage.vgot(n);
if (ipage.status())
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
}
NEIGH_OMP_CLOSE;
list->inum = nlocal;
}
/* ----------------------------------------------------------------------
binned neighbor list construction with Newton's 3rd law for triclinic
each owned atom i checks its own bin and other bins in triclinic stencil
multi-type stencil is itype dependent and is distance checked
every pair stored exactly once by some processor
------------------------------------------------------------------------- */
void Neighbor::half_multi_newton_tri_omp(NeighList *list)
{
// bin local & ghost atoms
if (binatomflag) bin_atoms();
const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
const int molecular = atom->molecular;
const int moltemplate = (molecular == 2) ? 1 : 0;
NEIGH_OMP_INIT;
#if defined(_OPENMP)
#pragma omp parallel default(none) shared(list)
#endif
NEIGH_OMP_SETUP(nlocal);
int i,j,k,n,itype,jtype,ibin,which,ns,imol,iatom;
tagint tagprev;
double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
int *neighptr,*s;
double *cutsq,*distsq;
// loop over each atom, storing neighbors
double **x = atom->x;
int *type = atom->type;
int *mask = atom->mask;
tagint *tag = atom->tag;
tagint *molecule = atom->molecule;
tagint **special = atom->special;
int **nspecial = atom->nspecial;
int *molindex = atom->molindex;
int *molatom = atom->molatom;
Molecule **onemols = atom->avec->onemols;
int *ilist = list->ilist;
int *numneigh = list->numneigh;
int **firstneigh = list->firstneigh;
int *nstencil_multi = list->nstencil_multi;
int **stencil_multi = list->stencil_multi;
double **distsq_multi = list->distsq_multi;
// each thread has its own page allocator
MyPage<int> &ipage = list->ipage[tid];
ipage.reset();
for (i = ifrom; i < ito; i++) {
n = 0;
neighptr = ipage.vget();
itype = type[i];
xtmp = x[i][0];
ytmp = x[i][1];
ztmp = x[i][2];
if (moltemplate) {
imol = molindex[i];
iatom = molatom[i];
tagprev = tag[i] - iatom - 1;
}
// loop over all atoms in bins, including self, in stencil
// skip if i,j neighbor cutoff is less than bin distance
// bins below self are excluded from stencil
// pairs for atoms j "below" i are excluded
// below = lower z or (equal z and lower y) or (equal zy and lower x)
// (equal zyx and j <= i)
// latter excludes self-self interaction but allows superposed atoms
ibin = coord2bin(x[i]);
s = stencil_multi[itype];
distsq = distsq_multi[itype];
cutsq = cutneighsq[itype];
ns = nstencil_multi[itype];
for (k = 0; k < ns; k++) {
for (j = binhead[ibin+s[k]]; j >= 0; j = bins[j]) {
jtype = type[j];
if (cutsq[jtype] < distsq[k]) continue;
if (x[j][2] < ztmp) continue;
if (x[j][2] == ztmp) {
if (x[j][1] < ytmp) continue;
if (x[j][1] == ytmp) {
if (x[j][0] < xtmp) continue;
if (x[j][0] == xtmp && j <= i) continue;
}
}
if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
if (rsq <= cutneighsq[itype][jtype]) {
if (molecular) {
if (!moltemplate)
which = find_special(special[i],nspecial[i],tag[j]);
else if (imol >=0)
which = find_special(onemols[imol]->special[iatom],
onemols[imol]->nspecial[iatom],
tag[j]-tagprev);
else which = 0;
if (which == 0) neighptr[n++] = j;
else if (domain->minimum_image_check(delx,dely,delz))
neighptr[n++] = j;
else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
} else neighptr[n++] = j;
}
}
}
ilist[i] = i;
firstneigh[i] = neighptr;
numneigh[i] = n;
ipage.vgot(n);
if (ipage.status())
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
}
NEIGH_OMP_CLOSE;
list->inum = nlocal;
}

View File

@ -1,376 +0,0 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#include "neighbor.h"
#include "neighbor_omp.h"
#include "neigh_list.h"
#include "atom.h"
#include "atom_vec.h"
#include "molecule.h"
#include "comm.h"
#include "domain.h"
#include "group.h"
#include "my_page.h"
#include "error.h"
using namespace LAMMPS_NS;
/* ----------------------------------------------------------------------
N^2 / 2 search for neighbor pairs with partial Newton's 3rd law
pair stored once if i,j are both owned and i < j
pair stored by me if j is ghost (also stored by proc owning j)
------------------------------------------------------------------------- */
void Neighbor::half_nsq_no_newton_omp(NeighList *list)
{
const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
const int bitmask = (includegroup) ? group->bitmask[includegroup] : 0;
const int nall = atom->nlocal + atom->nghost;
const int molecular = atom->molecular;
const int moltemplate = (molecular == 2) ? 1 : 0;
NEIGH_OMP_INIT;
#if defined(_OPENMP)
#pragma omp parallel default(none) shared(list)
#endif
NEIGH_OMP_SETUP(nlocal);
int i,j,n,itype,jtype,which,imol,iatom;
tagint tagprev;
double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
int *neighptr;
double **x = atom->x;
int *type = atom->type;
int *mask = atom->mask;
tagint *tag = atom->tag;
tagint *molecule = atom->molecule;
tagint **special = atom->special;
int **nspecial = atom->nspecial;
int *molindex = atom->molindex;
int *molatom = atom->molatom;
Molecule **onemols = atom->avec->onemols;
int *ilist = list->ilist;
int *numneigh = list->numneigh;
int **firstneigh = list->firstneigh;
// each thread has its own page allocator
MyPage<int> &ipage = list->ipage[tid];
ipage.reset();
// loop over owned atoms, storing neighbors
for (i = ifrom; i < ito; i++) {
n = 0;
neighptr = ipage.vget();
itype = type[i];
xtmp = x[i][0];
ytmp = x[i][1];
ztmp = x[i][2];
if (moltemplate) {
imol = molindex[i];
iatom = molatom[i];
tagprev = tag[i] - iatom - 1;
}
// loop over remaining atoms, owned and ghost
// only store pair if i < j
for (j = i+1; j < nall; j++) {
if (includegroup && !(mask[j] & bitmask)) continue;
jtype = type[j];
if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
if (rsq <= cutneighsq[itype][jtype]) {
if (molecular) {
if (!moltemplate)
which = find_special(special[i],nspecial[i],tag[j]);
else if (imol >=0)
which = find_special(onemols[imol]->special[iatom],
onemols[imol]->nspecial[iatom],
tag[j]-tagprev);
else which = 0;
if (which == 0) neighptr[n++] = j;
else if (domain->minimum_image_check(delx,dely,delz))
neighptr[n++] = j;
else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
} else neighptr[n++] = j;
}
}
ilist[i] = i;
firstneigh[i] = neighptr;
numneigh[i] = n;
ipage.vgot(n);
if (ipage.status())
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
}
NEIGH_OMP_CLOSE;
list->inum = nlocal;
}
/* ----------------------------------------------------------------------
N^2 / 2 search for neighbor pairs with partial Newton's 3rd law
include neighbors of ghost atoms, but no "special neighbors" for ghosts
pair stored once if i,j are both owned and i < j
pair stored by me if i owned and j ghost (also stored by proc owning j)
pair stored once if i,j are both ghost and i < j
------------------------------------------------------------------------- */
void Neighbor::half_nsq_no_newton_ghost_omp(NeighList *list)
{
const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
const int bitmask = (includegroup) ? group->bitmask[includegroup] : 0;
const int nall = nlocal + atom->nghost;
const int molecular = atom->molecular;
const int moltemplate = (molecular == 2) ? 1 : 0;
NEIGH_OMP_INIT;
#if defined(_OPENMP)
#pragma omp parallel default(none) shared(list)
#endif
NEIGH_OMP_SETUP(nall);
int i,j,n,itype,jtype,which,imol,iatom;
tagint tagprev;
double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
int *neighptr;
double **x = atom->x;
int *type = atom->type;
int *mask = atom->mask;
tagint *tag = atom->tag;
tagint *molecule = atom->molecule;
tagint **special = atom->special;
int **nspecial = atom->nspecial;
int *molindex = atom->molindex;
int *molatom = atom->molatom;
Molecule **onemols = atom->avec->onemols;
int *ilist = list->ilist;
int *numneigh = list->numneigh;
int **firstneigh = list->firstneigh;
// each thread has its own page allocator
MyPage<int> &ipage = list->ipage[tid];
ipage.reset();
// loop over owned & ghost atoms, storing neighbors
for (i = ifrom; i < ito; i++) {
n = 0;
neighptr = ipage.vget();
itype = type[i];
xtmp = x[i][0];
ytmp = x[i][1];
ztmp = x[i][2];
if (moltemplate) {
imol = molindex[i];
iatom = molatom[i];
tagprev = tag[i] - iatom - 1;
}
// loop over remaining atoms, owned and ghost
// only store pair if i < j
// stores own/own pairs only once
// stores own/ghost pairs with owned atom only, on both procs
// stores ghost/ghost pairs only once
// no molecular test when i = ghost atom
if (i < nlocal) {
for (j = i+1; j < nall; j++) {
if (includegroup && !(mask[j] & bitmask)) continue;
jtype = type[j];
if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
if (rsq <= cutneighsq[itype][jtype]) {
if (molecular) {
if (!moltemplate)
which = find_special(special[i],nspecial[i],tag[j]);
else if (imol >=0)
which = find_special(onemols[imol]->special[iatom],
onemols[imol]->nspecial[iatom],
tag[j]-tagprev);
else which = 0;
if (which == 0) neighptr[n++] = j;
else if (domain->minimum_image_check(delx,dely,delz))
neighptr[n++] = j;
else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
} else neighptr[n++] = j;
}
}
} else {
for (j = i+1; j < nall; j++) {
if (includegroup && !(mask[j] & bitmask)) continue;
jtype = type[j];
if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
if (rsq <= cutneighsq[itype][jtype]) neighptr[n++] = j;
}
}
ilist[i] = i;
firstneigh[i] = neighptr;
numneigh[i] = n;
ipage.vgot(n);
if (ipage.status())
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
}
NEIGH_OMP_CLOSE;
list->inum = atom->nlocal;
list->gnum = nall - atom->nlocal;
}
/* ----------------------------------------------------------------------
N^2 / 2 search for neighbor pairs with full Newton's 3rd law
every pair stored exactly once by some processor
decision on ghost atoms based on itag,jtag tests
------------------------------------------------------------------------- */
void Neighbor::half_nsq_newton_omp(NeighList *list)
{
const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
const int bitmask = (includegroup) ? group->bitmask[includegroup] : 0;
const int molecular = atom->molecular;
const int moltemplate = (molecular == 2) ? 1 : 0;
NEIGH_OMP_INIT;
#if defined(_OPENMP)
#pragma omp parallel default(none) shared(list)
#endif
NEIGH_OMP_SETUP(nlocal);
int i,j,n,itype,jtype,itag,jtag,which,imol,iatom;
tagint tagprev;
double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
int *neighptr;
// loop over each atom, storing neighbors
double **x = atom->x;
int *type = atom->type;
int *mask = atom->mask;
tagint *tag = atom->tag;
tagint *molecule = atom->molecule;
tagint **special = atom->special;
int **nspecial = atom->nspecial;
int *molindex = atom->molindex;
int *molatom = atom->molatom;
Molecule **onemols = atom->avec->onemols;
int nall = atom->nlocal + atom->nghost;
int *ilist = list->ilist;
int *numneigh = list->numneigh;
int **firstneigh = list->firstneigh;
// each thread has its own page allocator
MyPage<int> &ipage = list->ipage[tid];
ipage.reset();
for (i = ifrom; i < ito; i++) {
n = 0;
neighptr = ipage.vget();
itag = tag[i];
itype = type[i];
xtmp = x[i][0];
ytmp = x[i][1];
ztmp = x[i][2];
if (moltemplate) {
imol = molindex[i];
iatom = molatom[i];
tagprev = tag[i] - iatom - 1;
}
// loop over remaining atoms, owned and ghost
// itag = jtag is possible for long cutoffs that include images of self
for (j = i+1; j < nall; j++) {
if (includegroup && !(mask[j] & bitmask)) continue;
if (j >= nlocal) {
jtag = tag[j];
if (itag > jtag) {
if ((itag+jtag) % 2 == 0) continue;
} else if (itag < jtag) {
if ((itag+jtag) % 2 == 1) continue;
} else {
if (x[j][2] < ztmp) continue;
if (x[j][2] == ztmp) {
if (x[j][1] < ytmp) continue;
if (x[j][1] == ytmp && x[j][0] < xtmp) continue;
}
}
}
jtype = type[j];
if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
if (rsq <= cutneighsq[itype][jtype]) {
if (molecular) {
if (!moltemplate)
which = find_special(special[i],nspecial[i],tag[j]);
else if (imol >=0)
which = find_special(onemols[imol]->special[iatom],
onemols[imol]->nspecial[iatom],
tag[j]-tagprev);
else which = 0;
if (which == 0) neighptr[n++] = j;
else if (domain->minimum_image_check(delx,dely,delz))
neighptr[n++] = j;
else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
} else neighptr[n++] = j;
}
}
ilist[i] = i;
firstneigh[i] = neighptr;
numneigh[i] = n;
ipage.vgot(n);
if (ipage.status())
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
}
NEIGH_OMP_CLOSE;
list->inum = nlocal;
}

View File

@ -1,972 +0,0 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#include "neighbor.h"
#include "neighbor_omp.h"
#include "neigh_list.h"
#include "atom.h"
#include "atom_vec.h"
#include "molecule.h"
#include "comm.h"
#include "domain.h"
#include "group.h"
#include "my_page.h"
#include "error.h"
using namespace LAMMPS_NS;
/* ----------------------------------------------------------------------
multiple respa lists
N^2 / 2 search for neighbor pairs with partial Newton's 3rd law
pair added to list if atoms i and j are both owned and i < j
pair added if j is ghost (also stored by proc owning j)
------------------------------------------------------------------------- */
void Neighbor::respa_nsq_no_newton_omp(NeighList *list)
{
const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
const int bitmask = (includegroup) ? group->bitmask[includegroup] : 0;
const int molecular = atom->molecular;
const int moltemplate = (molecular == 2) ? 1 : 0;
NEIGH_OMP_INIT;
NeighList *listinner = list->listinner;
NeighList *listmiddle = list->listmiddle;
const int respamiddle = list->respamiddle;
#if defined(_OPENMP)
#pragma omp parallel default(none) shared(list,listinner,listmiddle)
#endif
NEIGH_OMP_SETUP(nlocal);
int i,j,n,itype,jtype,n_inner,n_middle,imol,iatom;
tagint tagprev;
double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
int *neighptr,*neighptr_inner,*neighptr_middle;
// loop over each atom, storing neighbors
double **x = atom->x;
int *type = atom->type;
int *mask = atom->mask;
tagint *tag = atom->tag;
tagint *molecule = atom->molecule;
tagint **special = atom->special;
int **nspecial = atom->nspecial;
int *molindex = atom->molindex;
int *molatom = atom->molatom;
Molecule **onemols = atom->avec->onemols;
int nall = atom->nlocal + atom->nghost;
int *ilist = list->ilist;
int *numneigh = list->numneigh;
int **firstneigh = list->firstneigh;
int *ilist_inner = listinner->ilist;
int *numneigh_inner = listinner->numneigh;
int **firstneigh_inner = listinner->firstneigh;
int *ilist_middle,*numneigh_middle,**firstneigh_middle;
if (respamiddle) {
ilist_middle = listmiddle->ilist;
numneigh_middle = listmiddle->numneigh;
firstneigh_middle = listmiddle->firstneigh;
}
// each thread has its own page allocator
MyPage<int> &ipage = list->ipage[tid];
MyPage<int> &ipage_inner = listinner->ipage[tid];
ipage.reset();
ipage_inner.reset();
MyPage<int> *ipage_middle;
if (respamiddle) {
ipage_middle = listmiddle->ipage + tid;
ipage_middle->reset();
}
int which = 0;
int minchange = 0;
for (i = ifrom; i < ito; i++) {
n = n_inner = 0;
neighptr = ipage.vget();
neighptr_inner = ipage_inner.vget();
if (respamiddle) {
n_middle = 0;
neighptr_middle = ipage_middle->vget();
}
itype = type[i];
xtmp = x[i][0];
ytmp = x[i][1];
ztmp = x[i][2];
if (moltemplate) {
imol = molindex[i];
iatom = molatom[i];
tagprev = tag[i] - iatom - 1;
}
// loop over remaining atoms, owned and ghost
for (j = i+1; j < nall; j++) {
if (includegroup && !(mask[j] & bitmask)) continue;
jtype = type[j];
if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
if (rsq <= cutneighsq[itype][jtype]) {
if (molecular) {
if (!moltemplate)
which = find_special(special[i],nspecial[i],tag[j]);
else if (imol >=0)
which = find_special(onemols[imol]->special[iatom],
onemols[imol]->nspecial[iatom],
tag[j]-tagprev);
else which = 0;
if (which == 0) neighptr[n++] = j;
else if ((minchange = domain->minimum_image_check(delx,dely,delz)))
neighptr[n++] = j;
else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
} else neighptr[n++] = j;
if (rsq < cut_inner_sq) {
if (which == 0) neighptr_inner[n_inner++] = j;
else if (minchange) neighptr_inner[n_inner++] = j;
else if (which > 0) neighptr_inner[n_inner++] = j ^ (which << SBBITS);
}
if (respamiddle && rsq < cut_middle_sq && rsq > cut_middle_inside_sq) {
if (which == 0) neighptr_middle[n_middle++] = j;
else if (minchange) neighptr_middle[n_middle++] = j;
else if (which > 0)
neighptr_middle[n_middle++] = j ^ (which << SBBITS);
}
}
}
ilist[i] = i;
firstneigh[i] = neighptr;
numneigh[i] = n;
ipage.vgot(n);
if (ipage.status())
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
ilist_inner[i] = i;
firstneigh_inner[i] = neighptr_inner;
numneigh_inner[i] = n_inner;
ipage.vgot(n_inner);
if (ipage_inner.status())
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
if (respamiddle) {
ilist_middle[i] = i;
firstneigh_middle[i] = neighptr_middle;
numneigh_middle[i] = n_middle;
ipage_middle->vgot(n_middle);
if (ipage_middle->status())
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
}
}
NEIGH_OMP_CLOSE;
list->inum = nlocal;
listinner->inum = nlocal;
if (respamiddle) listmiddle->inum = nlocal;
}
/* ----------------------------------------------------------------------
multiple respa lists
N^2 / 2 search for neighbor pairs with full Newton's 3rd law
pair added to list if atoms i and j are both owned and i < j
if j is ghost only me or other proc adds pair
decision based on itag,jtag tests
------------------------------------------------------------------------- */
void Neighbor::respa_nsq_newton_omp(NeighList *list)
{
const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
const int bitmask = (includegroup) ? group->bitmask[includegroup] : 0;
const int molecular = atom->molecular;
const int moltemplate = (molecular == 2) ? 1 : 0;
NEIGH_OMP_INIT;
NeighList *listinner = list->listinner;
NeighList *listmiddle = list->listmiddle;
const int respamiddle = list->respamiddle;
#if defined(_OPENMP)
#pragma omp parallel default(none) shared(list,listinner,listmiddle)
#endif
NEIGH_OMP_SETUP(nlocal);
int i,j,n,itype,jtype,itag,jtag,n_inner,n_middle,imol,iatom;
tagint tagprev;
double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
int *neighptr,*neighptr_inner,*neighptr_middle;
// loop over each atom, storing neighbors
double **x = atom->x;
int *type = atom->type;
int *mask = atom->mask;
tagint *tag = atom->tag;
tagint *molecule = atom->molecule;
tagint **special = atom->special;
int **nspecial = atom->nspecial;
int *molindex = atom->molindex;
int *molatom = atom->molatom;
Molecule **onemols = atom->avec->onemols;
int nall = atom->nlocal + atom->nghost;
int *ilist = list->ilist;
int *numneigh = list->numneigh;
int **firstneigh = list->firstneigh;
int *ilist_inner = listinner->ilist;
int *numneigh_inner = listinner->numneigh;
int **firstneigh_inner = listinner->firstneigh;
int *ilist_middle,*numneigh_middle,**firstneigh_middle;
if (respamiddle) {
ilist_middle = listmiddle->ilist;
numneigh_middle = listmiddle->numneigh;
firstneigh_middle = listmiddle->firstneigh;
}
// each thread has its own page allocator
MyPage<int> &ipage = list->ipage[tid];
MyPage<int> &ipage_inner = listinner->ipage[tid];
ipage.reset();
ipage_inner.reset();
MyPage<int> *ipage_middle;
if (respamiddle) {
ipage_middle = listmiddle->ipage + tid;
ipage_middle->reset();
}
int which = 0;
int minchange = 0;
for (i = ifrom; i < ito; i++) {
n = n_inner = 0;
neighptr = ipage.vget();
neighptr_inner = ipage_inner.vget();
if (respamiddle) {
n_middle = 0;
neighptr_middle = ipage_middle->vget();
}
itag = tag[i];
itype = type[i];
xtmp = x[i][0];
ytmp = x[i][1];
ztmp = x[i][2];
if (moltemplate) {
imol = molindex[i];
iatom = molatom[i];
tagprev = tag[i] - iatom - 1;
}
// loop over remaining atoms, owned and ghost
for (j = i+1; j < nall; j++) {
if (includegroup && !(mask[j] & bitmask)) continue;
if (j >= nlocal) {
jtag = tag[j];
if (itag > jtag) {
if ((itag+jtag) % 2 == 0) continue;
} else if (itag < jtag) {
if ((itag+jtag) % 2 == 1) continue;
} else {
if (x[j][2] < ztmp) continue;
if (x[j][2] == ztmp) {
if (x[j][1] < ytmp) continue;
if (x[j][1] == ytmp && x[j][0] < xtmp) continue;
}
}
}
jtype = type[j];
if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
if (rsq <= cutneighsq[itype][jtype]) {
if (molecular) {
if (!moltemplate)
which = find_special(special[i],nspecial[i],tag[j]);
else if (imol >=0)
which = find_special(onemols[imol]->special[iatom],
onemols[imol]->nspecial[iatom],
tag[j]-tagprev);
else which = 0;
if (which == 0) neighptr[n++] = j;
else if ((minchange = domain->minimum_image_check(delx,dely,delz)))
neighptr[n++] = j;
else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
} else neighptr[n++] = j;
if (rsq < cut_inner_sq) {
if (which == 0) neighptr_inner[n_inner++] = j;
else if (minchange) neighptr_inner[n_inner++] = j;
else if (which > 0) neighptr_inner[n_inner++] = j ^ (which << SBBITS);
}
if (respamiddle &&
rsq < cut_middle_sq && rsq > cut_middle_inside_sq) {
if (which == 0) neighptr_middle[n_middle++] = j;
else if (minchange) neighptr_middle[n_middle++] = j;
else if (which > 0)
neighptr_middle[n_middle++] = j ^ (which << SBBITS);
}
}
}
ilist[i] = i;
firstneigh[i] = neighptr;
numneigh[i] = n;
ipage.vgot(n);
if (ipage.status())
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
ilist_inner[i] = i;
firstneigh_inner[i] = neighptr_inner;
numneigh_inner[i] = n_inner;
ipage.vgot(n_inner);
if (ipage_inner.status())
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
if (respamiddle) {
ilist_middle[i] = i;
firstneigh_middle[i] = neighptr_middle;
numneigh_middle[i] = n_middle;
ipage_middle->vgot(n_middle);
if (ipage_middle->status())
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
}
}
NEIGH_OMP_CLOSE;
list->inum = nlocal;
listinner->inum = nlocal;
if (respamiddle) listmiddle->inum = nlocal;
}
/* ----------------------------------------------------------------------
multiple respa lists
binned neighbor list construction with partial Newton's 3rd law
each owned atom i checks own bin and surrounding bins in non-Newton stencil
pair stored once if i,j are both owned and i < j
pair stored by me if j is ghost (also stored by proc owning j)
------------------------------------------------------------------------- */
void Neighbor::respa_bin_no_newton_omp(NeighList *list)
{
// bin local & ghost atoms
if (binatomflag) bin_atoms();
const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
const int molecular = atom->molecular;
const int moltemplate = (molecular == 2) ? 1 : 0;
NEIGH_OMP_INIT;
NeighList *listinner = list->listinner;
NeighList *listmiddle = list->listmiddle;
const int respamiddle = list->respamiddle;
#if defined(_OPENMP)
#pragma omp parallel default(none) shared(list,listinner,listmiddle)
#endif
NEIGH_OMP_SETUP(nlocal);
int i,j,k,n,itype,jtype,ibin,n_inner,n_middle,imol,iatom;
tagint tagprev;
double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
int *neighptr,*neighptr_inner,*neighptr_middle;
// loop over each atom, storing neighbors
double **x = atom->x;
int *type = atom->type;
int *mask = atom->mask;
tagint *tag = atom->tag;
tagint *molecule = atom->molecule;
tagint **special = atom->special;
int **nspecial = atom->nspecial;
int *molindex = atom->molindex;
int *molatom = atom->molatom;
Molecule **onemols = atom->avec->onemols;
int *ilist = list->ilist;
int *numneigh = list->numneigh;
int **firstneigh = list->firstneigh;
int nstencil = list->nstencil;
int *stencil = list->stencil;
int *ilist_inner = listinner->ilist;
int *numneigh_inner = listinner->numneigh;
int **firstneigh_inner = listinner->firstneigh;
int *ilist_middle,*numneigh_middle,**firstneigh_middle;
if (respamiddle) {
ilist_middle = listmiddle->ilist;
numneigh_middle = listmiddle->numneigh;
firstneigh_middle = listmiddle->firstneigh;
}
// each thread has its own page allocator
MyPage<int> &ipage = list->ipage[tid];
MyPage<int> &ipage_inner = listinner->ipage[tid];
ipage.reset();
ipage_inner.reset();
MyPage<int> *ipage_middle;
if (respamiddle) {
ipage_middle = listmiddle->ipage + tid;
ipage_middle->reset();
}
int which = 0;
int minchange = 0;
for (i = ifrom; i < ito; i++) {
n = n_inner = 0;
neighptr = ipage.vget();
neighptr_inner = ipage_inner.vget();
if (respamiddle) {
n_middle = 0;
neighptr_middle = ipage_middle->vget();
}
itype = type[i];
xtmp = x[i][0];
ytmp = x[i][1];
ztmp = x[i][2];
ibin = coord2bin(x[i]);
if (moltemplate) {
imol = molindex[i];
iatom = molatom[i];
tagprev = tag[i] - iatom - 1;
}
// loop over all atoms in surrounding bins in stencil including self
// only store pair if i < j
// stores own/own pairs only once
// stores own/ghost pairs on both procs
for (k = 0; k < nstencil; k++) {
for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) {
if (j <= i) continue;
jtype = type[j];
if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
if (rsq <= cutneighsq[itype][jtype]) {
if (molecular) {
if (!moltemplate)
which = find_special(special[i],nspecial[i],tag[j]);
else if (imol >=0)
which = find_special(onemols[imol]->special[iatom],
onemols[imol]->nspecial[iatom],
tag[j]-tagprev);
else which = 0;
if (which == 0) neighptr[n++] = j;
else if ((minchange = domain->minimum_image_check(delx,dely,delz)))
neighptr[n++] = j;
else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
} else neighptr[n++] = j;
if (rsq < cut_inner_sq) {
if (which == 0) neighptr_inner[n_inner++] = j;
else if (minchange) neighptr_inner[n_inner++] = j;
else if (which > 0)
neighptr_inner[n_inner++] = j ^ (which << SBBITS);
}
if (respamiddle &&
rsq < cut_middle_sq && rsq > cut_middle_inside_sq) {
if (which == 0) neighptr_middle[n_middle++] = j;
else if (minchange) neighptr_middle[n_middle++] = j;
else if (which > 0)
neighptr_middle[n_middle++] = j ^ (which << SBBITS);
}
}
}
}
ilist[i] = i;
firstneigh[i] = neighptr;
numneigh[i] = n;
ipage.vgot(n);
if (ipage.status())
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
ilist_inner[i] = i;
firstneigh_inner[i] = neighptr_inner;
numneigh_inner[i] = n_inner;
ipage.vgot(n_inner);
if (ipage_inner.status())
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
if (respamiddle) {
ilist_middle[i] = i;
firstneigh_middle[i] = neighptr_middle;
numneigh_middle[i] = n_middle;
ipage_middle->vgot(n_middle);
if (ipage_middle->status())
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
}
}
NEIGH_OMP_CLOSE;
list->inum = nlocal;
listinner->inum = nlocal;
if (respamiddle) listmiddle->inum = nlocal;
}
/* ----------------------------------------------------------------------
multiple respa lists
binned neighbor list construction with full Newton's 3rd law
each owned atom i checks its own bin and other bins in Newton stencil
every pair stored exactly once by some processor
------------------------------------------------------------------------- */
void Neighbor::respa_bin_newton_omp(NeighList *list)
{
// bin local & ghost atoms
if (binatomflag) bin_atoms();
const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
const int molecular = atom->molecular;
const int moltemplate = (molecular == 2) ? 1 : 0;
NEIGH_OMP_INIT;
NeighList *listinner = list->listinner;
NeighList *listmiddle = list->listmiddle;
const int respamiddle = list->respamiddle;
#if defined(_OPENMP)
#pragma omp parallel default(none) shared(list,listinner,listmiddle)
#endif
NEIGH_OMP_SETUP(nlocal);
int i,j,k,n,itype,jtype,ibin,n_inner,n_middle,imol,iatom;
tagint tagprev;
double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
int *neighptr,*neighptr_inner,*neighptr_middle;
// loop over each atom, storing neighbors
double **x = atom->x;
int *type = atom->type;
int *mask = atom->mask;
tagint *tag = atom->tag;
tagint *molecule = atom->molecule;
tagint **special = atom->special;
int **nspecial = atom->nspecial;
int *molindex = atom->molindex;
int *molatom = atom->molatom;
Molecule **onemols = atom->avec->onemols;
int *ilist = list->ilist;
int *numneigh = list->numneigh;
int **firstneigh = list->firstneigh;
int nstencil = list->nstencil;
int *stencil = list->stencil;
int *ilist_inner = listinner->ilist;
int *numneigh_inner = listinner->numneigh;
int **firstneigh_inner = listinner->firstneigh;
int *ilist_middle,*numneigh_middle,**firstneigh_middle;
if (respamiddle) {
ilist_middle = listmiddle->ilist;
numneigh_middle = listmiddle->numneigh;
firstneigh_middle = listmiddle->firstneigh;
}
// each thread has its own page allocator
MyPage<int> &ipage = list->ipage[tid];
MyPage<int> &ipage_inner = listinner->ipage[tid];
ipage.reset();
ipage_inner.reset();
MyPage<int> *ipage_middle;
if (respamiddle) {
ipage_middle = listmiddle->ipage + tid;
ipage_middle->reset();
}
int which = 0;
int minchange = 0;
for (i = ifrom; i < ito; i++) {
n = n_inner = 0;
neighptr = ipage.vget();
neighptr_inner = ipage_inner.vget();
if (respamiddle) {
n_middle = 0;
neighptr_middle = ipage_middle->vget();
}
itype = type[i];
xtmp = x[i][0];
ytmp = x[i][1];
ztmp = x[i][2];
if (moltemplate) {
imol = molindex[i];
iatom = molatom[i];
tagprev = tag[i] - iatom - 1;
}
// loop over rest of atoms in i's bin, ghosts are at end of linked list
// if j is owned atom, store it, since j is beyond i in linked list
// if j is ghost, only store if j coords are "above and to the right" of i
for (j = bins[i]; j >= 0; j = bins[j]) {
if (j >= nlocal) {
if (x[j][2] < ztmp) continue;
if (x[j][2] == ztmp) {
if (x[j][1] < ytmp) continue;
if (x[j][1] == ytmp && x[j][0] < xtmp) continue;
}
}
jtype = type[j];
if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
if (rsq <= cutneighsq[itype][jtype]) {
if (molecular) {
if (!moltemplate)
which = find_special(special[i],nspecial[i],tag[j]);
else if (imol >=0)
which = find_special(onemols[imol]->special[iatom],
onemols[imol]->nspecial[iatom],
tag[j]-tagprev);
else which = 0;
if (which == 0) neighptr[n++] = j;
else if ((minchange = domain->minimum_image_check(delx,dely,delz)))
neighptr[n++] = j;
else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
} else neighptr[n++] = j;
if (rsq < cut_inner_sq) {
if (which == 0) neighptr_inner[n_inner++] = j;
else if (minchange) neighptr_inner[n_inner++] = j;
else if (which > 0) neighptr_inner[n_inner++] = j ^ (which << SBBITS);
}
if (respamiddle &&
rsq < cut_middle_sq && rsq > cut_middle_inside_sq) {
if (which == 0) neighptr_middle[n_middle++] = j;
else if (minchange) neighptr_middle[n_middle++] = j;
else if (which > 0)
neighptr_middle[n_middle++] = j ^ (which << SBBITS);
}
}
}
// loop over all atoms in other bins in stencil, store every pair
ibin = coord2bin(x[i]);
for (k = 0; k < nstencil; k++) {
for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) {
jtype = type[j];
if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
if (rsq <= cutneighsq[itype][jtype]) {
if (molecular) {
if (!moltemplate)
which = find_special(special[i],nspecial[i],tag[j]);
else if (imol >=0)
which = find_special(onemols[imol]->special[iatom],
onemols[imol]->nspecial[iatom],
tag[j]-tagprev);
else which = 0;
if (which == 0) neighptr[n++] = j;
else if ((minchange = domain->minimum_image_check(delx,dely,delz)))
neighptr[n++] = j;
else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
} else neighptr[n++] = j;
if (rsq < cut_inner_sq) {
if (which == 0) neighptr_inner[n_inner++] = j;
else if (minchange) neighptr_inner[n_inner++] = j;
else if (which > 0)
neighptr_inner[n_inner++] = j ^ (which << SBBITS);
}
if (respamiddle &&
rsq < cut_middle_sq && rsq > cut_middle_inside_sq) {
if (which == 0) neighptr_middle[n_middle++] = j;
else if (minchange) neighptr_middle[n_middle++] = j;
else if (which > 0)
neighptr_middle[n_middle++] = j ^ (which << SBBITS);
}
}
}
}
ilist[i] = i;
firstneigh[i] = neighptr;
numneigh[i] = n;
ipage.vgot(n);
if (ipage.status())
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
ilist_inner[i] = i;
firstneigh_inner[i] = neighptr_inner;
numneigh_inner[i] = n_inner;
ipage.vgot(n_inner);
if (ipage_inner.status())
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
if (respamiddle) {
ilist_middle[i] = i;
firstneigh_middle[i] = neighptr_middle;
numneigh_middle[i] = n_middle;
ipage_middle->vgot(n_middle);
if (ipage_middle->status())
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
}
}
NEIGH_OMP_CLOSE;
list->inum = nlocal;
listinner->inum = nlocal;
if (respamiddle) listmiddle->inum = nlocal;
}
/* ----------------------------------------------------------------------
multiple respa lists
binned neighbor list construction with Newton's 3rd law for triclinic
each owned atom i checks its own bin and other bins in triclinic stencil
every pair stored exactly once by some processor
------------------------------------------------------------------------- */
void Neighbor::respa_bin_newton_tri_omp(NeighList *list)
{
// bin local & ghost atoms
if (binatomflag) bin_atoms();
const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
const int molecular = atom->molecular;
const int moltemplate = (molecular == 2) ? 1 : 0;
NEIGH_OMP_INIT;
NeighList *listinner = list->listinner;
NeighList *listmiddle = list->listmiddle;
const int respamiddle = list->respamiddle;
#if defined(_OPENMP)
#pragma omp parallel default(none) shared(list,listinner,listmiddle)
#endif
NEIGH_OMP_SETUP(nlocal);
int i,j,k,n,itype,jtype,ibin,n_inner,n_middle,imol,iatom;
tagint tagprev;
double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
int *neighptr,*neighptr_inner,*neighptr_middle;
// loop over each atom, storing neighbors
double **x = atom->x;
int *type = atom->type;
int *mask = atom->mask;
tagint *tag = atom->tag;
tagint *molecule = atom->molecule;
tagint **special = atom->special;
int **nspecial = atom->nspecial;
int *molindex = atom->molindex;
int *molatom = atom->molatom;
Molecule **onemols = atom->avec->onemols;
int *ilist = list->ilist;
int *numneigh = list->numneigh;
int **firstneigh = list->firstneigh;
int nstencil = list->nstencil;
int *stencil = list->stencil;
int *ilist_inner = listinner->ilist;
int *numneigh_inner = listinner->numneigh;
int **firstneigh_inner = listinner->firstneigh;
int *ilist_middle,*numneigh_middle,**firstneigh_middle;
if (respamiddle) {
ilist_middle = listmiddle->ilist;
numneigh_middle = listmiddle->numneigh;
firstneigh_middle = listmiddle->firstneigh;
}
// each thread has its own page allocator
MyPage<int> &ipage = list->ipage[tid];
MyPage<int> &ipage_inner = listinner->ipage[tid];
ipage.reset();
ipage_inner.reset();
MyPage<int> *ipage_middle;
if (respamiddle) {
ipage_middle = listmiddle->ipage + tid;
ipage_middle->reset();
}
int which = 0;
int minchange = 0;
for (i = ifrom; i < ito; i++) {
n = n_inner = 0;
neighptr = ipage.vget();
neighptr_inner = ipage_inner.vget();
if (respamiddle) {
n_middle = 0;
neighptr_middle = ipage_middle->vget();
}
itype = type[i];
xtmp = x[i][0];
ytmp = x[i][1];
ztmp = x[i][2];
if (moltemplate) {
imol = molindex[i];
iatom = molatom[i];
tagprev = tag[i] - iatom - 1;
}
// loop over all atoms in bins in stencil
// pairs for atoms j "below" i are excluded
// below = lower z or (equal z and lower y) or (equal zy and lower x)
// (equal zyx and j <= i)
// latter excludes self-self interaction but allows superposed atoms
ibin = coord2bin(x[i]);
for (k = 0; k < nstencil; k++) {
for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) {
if (x[j][2] < ztmp) continue;
if (x[j][2] == ztmp) {
if (x[j][1] < ytmp) continue;
if (x[j][1] == ytmp) {
if (x[j][0] < xtmp) continue;
if (x[j][0] == xtmp && j <= i) continue;
}
}
jtype = type[j];
if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
if (rsq <= cutneighsq[itype][jtype]) {
if (molecular) {
if (!moltemplate)
which = find_special(special[i],nspecial[i],tag[j]);
else if (imol >=0)
which = find_special(onemols[imol]->special[iatom],
onemols[imol]->nspecial[iatom],
tag[j]-tagprev);
else which = 0;
if (which == 0) neighptr[n++] = j;
else if ((minchange = domain->minimum_image_check(delx,dely,delz)))
neighptr[n++] = j;
else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
} else neighptr[n++] = j;
if (rsq < cut_inner_sq) {
if (which == 0) neighptr_inner[n_inner++] = j;
else if (minchange) neighptr_inner[n_inner++] = j;
else if (which > 0)
neighptr_inner[n_inner++] = j ^ (which << SBBITS);
}
if (respamiddle &&
rsq < cut_middle_sq && rsq > cut_middle_inside_sq) {
if (which == 0) neighptr_middle[n_middle++] = j;
else if (minchange) neighptr_middle[n_middle++] = j;
else if (which > 0)
neighptr_middle[n_middle++] = j ^ (which << SBBITS);
}
}
}
}
ilist[i] = i;
firstneigh[i] = neighptr;
numneigh[i] = n;
ipage.vgot(n);
if (ipage.status())
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
ilist_inner[i] = i;
firstneigh_inner[i] = neighptr_inner;
numneigh_inner[i] = n_inner;
ipage.vgot(n_inner);
if (ipage_inner.status())
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
if (respamiddle) {
ilist_middle[i] = i;
firstneigh_middle[i] = neighptr_middle;
numneigh_middle[i] = n_middle;
ipage_middle->vgot(n_middle);
if (ipage_middle->status())
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
}
}
NEIGH_OMP_CLOSE;
list->inum = nlocal;
listinner->inum = nlocal;
if (respamiddle) listmiddle->inum = nlocal;
}

View File

@ -0,0 +1,166 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#include "npair_full_bin_ghost_omp.h"
#include "npair_omp.h"
#include "neighbor.h"
#include "neigh_list.h"
#include "atom.h"
#include "atom_vec.h"
#include "molecule.h"
#include "domain.h"
#include "my_page.h"
#include "error.h"
using namespace LAMMPS_NS;
using namespace NeighConst;
/* ---------------------------------------------------------------------- */
NPairFullBinGhostOmp::NPairFullBinGhostOmp(LAMMPS *lmp) : NPair(lmp) {}
/* ----------------------------------------------------------------------
binned neighbor list construction for all neighbors
include neighbors of ghost atoms, but no "special neighbors" for ghosts
every neighbor pair appears in list of both atoms i and j
------------------------------------------------------------------------- */
void NPairFullBinGhostOmp::build(NeighList *list)
{
const int nlocal = atom->nlocal;
const int nall = nlocal + atom->nghost;
const int molecular = atom->molecular;
const int moltemplate = (molecular == 2) ? 1 : 0;
NPAIR_OMP_INIT;
#if defined(_OPENMP)
#pragma omp parallel default(none) shared(list)
#endif
NPAIR_OMP_SETUP(nall);
int i,j,k,n,itype,jtype,ibin,which,imol,iatom;
tagint tagprev;
double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
int xbin,ybin,zbin,xbin2,ybin2,zbin2;
int *neighptr;
double **x = atom->x;
int *type = atom->type;
int *mask = atom->mask;
tagint *tag = atom->tag;
tagint *molecule = atom->molecule;
tagint **special = atom->special;
int **nspecial = atom->nspecial;
int *molindex = atom->molindex;
int *molatom = atom->molatom;
Molecule **onemols = atom->avec->onemols;
int *ilist = list->ilist;
int *numneigh = list->numneigh;
int **firstneigh = list->firstneigh;
// each thread has its own page allocator
MyPage<int> &ipage = list->ipage[tid];
ipage.reset();
// loop over owned & ghost atoms, storing neighbors
for (i = ifrom; i < ito; i++) {
n = 0;
neighptr = ipage.vget();
itype = type[i];
xtmp = x[i][0];
ytmp = x[i][1];
ztmp = x[i][2];
if (moltemplate) {
imol = molindex[i];
iatom = molatom[i];
tagprev = tag[i] - iatom - 1;
}
// loop over all atoms in surrounding bins in stencil including self
// when i is a ghost atom, must check if stencil bin is out of bounds
// skip i = j
// no molecular test when i = ghost atom
if (i < nlocal) {
ibin = coord2bin(x[i]);
for (k = 0; k < nstencil; k++) {
for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) {
if (i == j) continue;
jtype = type[j];
if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
if (rsq <= cutneighsq[itype][jtype]) {
if (molecular) {
if (!moltemplate)
which = find_special(special[i],nspecial[i],tag[j]);
else if (imol >=0)
which = find_special(onemols[imol]->special[iatom],
onemols[imol]->nspecial[iatom],
tag[j]-tagprev);
else which = 0;
if (which == 0) neighptr[n++] = j;
else if (domain->minimum_image_check(delx,dely,delz))
neighptr[n++] = j;
else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
} else neighptr[n++] = j;
}
}
}
} else {
ibin = coord2bin(x[i],xbin,ybin,zbin);
for (k = 0; k < nstencil; k++) {
xbin2 = xbin + stencilxyz[k][0];
ybin2 = ybin + stencilxyz[k][1];
zbin2 = zbin + stencilxyz[k][2];
if (xbin2 < 0 || xbin2 >= mbinx ||
ybin2 < 0 || ybin2 >= mbiny ||
zbin2 < 0 || zbin2 >= mbinz) continue;
for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) {
if (i == j) continue;
jtype = type[j];
if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
if (rsq <= cutneighghostsq[itype][jtype]) neighptr[n++] = j;
}
}
}
ilist[i] = i;
firstneigh[i] = neighptr;
numneigh[i] = n;
ipage.vgot(n);
if (ipage.status())
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
}
NPAIR_OMP_CLOSE;
list->inum = nlocal;
list->gnum = nall - nlocal;
}

View File

@ -0,0 +1,44 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef NPAIR_CLASS
NPairStyle(full/bin/ghost/omp,
NPairFullBinGhostOmp,
NP_FULL | NP_BIN | NP_GHOST | NP_OMP | NP_NEWTON | NP_NEWTOFF |
NP_ORTHO | NP_TRI)
#else
#ifndef LMP_NPAIR_FULL_BIN_GHOST_OMP_H
#define LMP_NPAIR_FULL_BIN_GHOST_OMP_H
#include "npair.h"
namespace LAMMPS_NS {
class NPairFullBinGhostOmp : public NPair {
public:
NPairFullBinGhostOmp(class LAMMPS *);
~NPairFullBinGhostOmp() {}
void build(class NeighList *);
};
}
#endif
#endif
/* ERROR/WARNING messages:
*/

View File

@ -0,0 +1,135 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#include "npair_full_bin_omp.h"
#include "npair_omp.h"
#include "neighbor.h"
#include "neigh_list.h"
#include "atom.h"
#include "atom_vec.h"
#include "molecule.h"
#include "domain.h"
#include "my_page.h"
#include "error.h"
using namespace LAMMPS_NS;
using namespace NeighConst;
/* ---------------------------------------------------------------------- */
NPairFullBinOmp::NPairFullBinOmp(LAMMPS *lmp) : NPair(lmp) {}
/* ----------------------------------------------------------------------
binned neighbor list construction for all neighbors
every neighbor pair appears in list of both atoms i and j
------------------------------------------------------------------------- */
void NPairFullBinOmp::build(NeighList *list)
{
const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
const int molecular = atom->molecular;
const int moltemplate = (molecular == 2) ? 1 : 0;
NPAIR_OMP_INIT;
#if defined(_OPENMP)
#pragma omp parallel default(none) shared(list)
#endif
NPAIR_OMP_SETUP(nlocal);
int i,j,k,n,itype,jtype,ibin,which,imol,iatom;
tagint tagprev;
double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
int *neighptr;
double **x = atom->x;
int *type = atom->type;
int *mask = atom->mask;
tagint *tag = atom->tag;
tagint *molecule = atom->molecule;
tagint **special = atom->special;
int **nspecial = atom->nspecial;
int *molindex = atom->molindex;
int *molatom = atom->molatom;
Molecule **onemols = atom->avec->onemols;
int *ilist = list->ilist;
int *numneigh = list->numneigh;
int **firstneigh = list->firstneigh;
// each thread has its own page allocator
MyPage<int> &ipage = list->ipage[tid];
ipage.reset();
// loop over owned atoms, storing neighbors
for (i = ifrom; i < ito; i++) {
n = 0;
neighptr = ipage.vget();
itype = type[i];
xtmp = x[i][0];
ytmp = x[i][1];
ztmp = x[i][2];
if (moltemplate) {
imol = molindex[i];
iatom = molatom[i];
tagprev = tag[i] - iatom - 1;
}
// loop over all atoms in surrounding bins in stencil including self
// skip i = j
ibin = coord2bin(x[i]);
for (k = 0; k < nstencil; k++) {
for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) {
if (i == j) continue;
jtype = type[j];
if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
if (rsq <= cutneighsq[itype][jtype]) {
if (molecular) {
if (!moltemplate)
which = find_special(special[i],nspecial[i],tag[j]);
else if (imol >=0)
which = find_special(onemols[imol]->special[iatom],
onemols[imol]->nspecial[iatom],
tag[j]-tagprev);
else which = 0;
if (which == 0) neighptr[n++] = j;
else if (domain->minimum_image_check(delx,dely,delz))
neighptr[n++] = j;
else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
} else neighptr[n++] = j;
}
}
}
ilist[i] = i;
firstneigh[i] = neighptr;
numneigh[i] = n;
ipage.vgot(n);
if (ipage.status())
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
}
NPAIR_OMP_CLOSE;
list->inum = nlocal;
list->gnum = 0;
}

View File

@ -0,0 +1,44 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef NPAIR_CLASS
NPairStyle(full/bin/omp,
NPairFullBinOmp,
NP_FULL | NP_BIN | NP_OMP | NP_NEWTON | NP_NEWTOFF |
NP_ORTHO | NP_TRI)
#else
#ifndef LMP_NPAIR_FULL_BIN_OMP_H
#define LMP_NPAIR_FULL_BIN_OMP_H
#include "npair.h"
namespace LAMMPS_NS {
class NPairFullBinOmp : public NPair {
public:
NPairFullBinOmp(class LAMMPS *);
~NPairFullBinOmp() {}
void build(class NeighList *);
};
}
#endif
#endif
/* ERROR/WARNING messages:
*/

View File

@ -0,0 +1,143 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#include "npair_full_multi_omp.h"
#include "npair_omp.h"
#include "neighbor.h"
#include "neigh_list.h"
#include "atom.h"
#include "atom_vec.h"
#include "molecule.h"
#include "domain.h"
#include "my_page.h"
#include "error.h"
using namespace LAMMPS_NS;
using namespace NeighConst;
/* ---------------------------------------------------------------------- */
NPairFullMultiOmp::NPairFullMultiOmp(LAMMPS *lmp) : NPair(lmp) {}
/* ----------------------------------------------------------------------
binned neighbor list construction for all neighbors
multi-type stencil is itype dependent and is distance checked
every neighbor pair appears in list of both atoms i and j
------------------------------------------------------------------------- */
void NPairFullMultiOmp::build(NeighList *list)
{
const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
const int molecular = atom->molecular;
const int moltemplate = (molecular == 2) ? 1 : 0;
NPAIR_OMP_INIT;
#if defined(_OPENMP)
#pragma omp parallel default(none) shared(list)
#endif
NPAIR_OMP_SETUP(nlocal);
int i,j,k,n,itype,jtype,ibin,which,ns,imol,iatom;
tagint tagprev;
double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
int *neighptr,*s;
double *cutsq,*distsq;
// loop over each atom, storing neighbors
double **x = atom->x;
int *type = atom->type;
int *mask = atom->mask;
tagint *tag = atom->tag;
tagint *molecule = atom->molecule;
tagint **special = atom->special;
int **nspecial = atom->nspecial;
int *molindex = atom->molindex;
int *molatom = atom->molatom;
Molecule **onemols = atom->avec->onemols;
int *ilist = list->ilist;
int *numneigh = list->numneigh;
int **firstneigh = list->firstneigh;
// each thread has its own page allocator
MyPage<int> &ipage = list->ipage[tid];
ipage.reset();
for (i = ifrom; i < ito; i++) {
n = 0;
neighptr = ipage.vget();
itype = type[i];
xtmp = x[i][0];
ytmp = x[i][1];
ztmp = x[i][2];
if (moltemplate) {
imol = molindex[i];
iatom = molatom[i];
tagprev = tag[i] - iatom - 1;
}
// loop over all atoms in other bins in stencil, including self
// skip if i,j neighbor cutoff is less than bin distance
// skip i = j
ibin = coord2bin(x[i]);
s = stencil_multi[itype];
distsq = distsq_multi[itype];
cutsq = cutneighsq[itype];
ns = nstencil_multi[itype];
for (k = 0; k < ns; k++) {
for (j = binhead[ibin+s[k]]; j >= 0; j = bins[j]) {
jtype = type[j];
if (cutsq[jtype] < distsq[k]) continue;
if (i == j) continue;
if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
if (rsq <= cutneighsq[itype][jtype]) {
if (molecular) {
if (!moltemplate)
which = find_special(special[i],nspecial[i],tag[j]);
else if (imol >=0)
which = find_special(onemols[imol]->special[iatom],
onemols[imol]->nspecial[iatom],
tag[j]-tagprev);
else which = 0;
if (which == 0) neighptr[n++] = j;
else if (domain->minimum_image_check(delx,dely,delz))
neighptr[n++] = j;
else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
} else neighptr[n++] = j;
}
}
}
ilist[i] = i;
firstneigh[i] = neighptr;
numneigh[i] = n;
ipage.vgot(n);
if (ipage.status())
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
}
NPAIR_OMP_CLOSE;
list->inum = nlocal;
list->gnum = 0;
}

View File

@ -0,0 +1,44 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef NPAIR_CLASS
NPairStyle(full/multi/omp,
NPairFullMultiOmp,
NP_FULL | NP_MULTI | NP_OMP |
NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI)
#else
#ifndef LMP_NPAIR_FULL_MULTI_OMP_H
#define LMP_NPAIR_FULL_MULTI_OMP_H
#include "npair.h"
namespace LAMMPS_NS {
class NPairFullMultiOmp : public NPair {
public:
NPairFullMultiOmp(class LAMMPS *);
~NPairFullMultiOmp() {}
void build(class NeighList *);
};
}
#endif
#endif
/* ERROR/WARNING messages:
*/

Some files were not shown because too many files have changed in this diff Show More