Merge pull request #185 from akohlmey/new-neighbor
New neighbor list code with updates for USER-OMP and USER-DPD
This commit is contained in:
2
src/.gitignore
vendored
2
src/.gitignore
vendored
@ -18,6 +18,8 @@
|
||||
/*_tally.cpp
|
||||
/*_rx.h
|
||||
/*_rx.cpp
|
||||
/*_ssa.h
|
||||
/*_ssa.cpp
|
||||
|
||||
/kokkos.cpp
|
||||
/kokkos.h
|
||||
|
||||
@ -105,11 +105,16 @@ action modify_kokkos.cpp
|
||||
action modify_kokkos.h
|
||||
action neigh_bond_kokkos.cpp
|
||||
action neigh_bond_kokkos.h
|
||||
action neigh_full_kokkos.h
|
||||
action neigh_list_kokkos.cpp
|
||||
action neigh_list_kokkos.h
|
||||
action neighbor_kokkos.cpp
|
||||
action neighbor_kokkos.h
|
||||
action npair_copy_kokkos.cpp
|
||||
action npair_copy_kokkos.h
|
||||
action npair_kokkos.cpp
|
||||
action npair_kokkos.h
|
||||
action nbin_kokkos.cpp
|
||||
action nbin_kokkos.h
|
||||
action math_special_kokkos.cpp
|
||||
action math_special_kokkos.h
|
||||
action pair_buck_coul_cut_kokkos.cpp
|
||||
|
||||
@ -125,12 +125,10 @@ void FixQEqReaxKokkos<DeviceType>::init()
|
||||
neighbor->requests[irequest]->pair = 0;
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else { //if (neighflag == HALF || neighflag == HALFTHREAD)
|
||||
neighbor->requests[irequest]->fix = 1;
|
||||
neighbor->requests[irequest]->full = 0;
|
||||
neighbor->requests[irequest]->half = 1;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
neighbor->requests[irequest]->ghost = 1;
|
||||
}
|
||||
}
|
||||
|
||||
@ -168,7 +168,6 @@ void KokkosLMP::accelerator(int narg, char **arg)
|
||||
else
|
||||
neighflag = HALF;
|
||||
} else if (strcmp(arg[iarg+1],"n2") == 0) neighflag = N2;
|
||||
else if (strcmp(arg[iarg+1],"full/cluster") == 0) neighflag = FULLCLUSTER;
|
||||
else error->all(FLERR,"Illegal package kokkos command");
|
||||
iarg += 2;
|
||||
} else if (strcmp(arg[iarg],"binsize") == 0) {
|
||||
@ -232,20 +231,6 @@ void KokkosLMP::accelerator(int narg, char **arg)
|
||||
called by Finish
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
int KokkosLMP::neigh_list_kokkos(int m)
|
||||
{
|
||||
NeighborKokkos *nk = (NeighborKokkos *) neighbor;
|
||||
if (nk->lists_host[m] && nk->lists_host[m]->d_numneigh.dimension_0())
|
||||
return 1;
|
||||
if (nk->lists_device[m] && nk->lists_device[m]->d_numneigh.dimension_0())
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
called by Finish
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
int KokkosLMP::neigh_count(int m)
|
||||
{
|
||||
int inum;
|
||||
@ -255,28 +240,30 @@ int KokkosLMP::neigh_count(int m)
|
||||
ArrayTypes<LMPHostType>::t_int_1d h_numneigh;
|
||||
|
||||
NeighborKokkos *nk = (NeighborKokkos *) neighbor;
|
||||
if (nk->lists_host[m]) {
|
||||
inum = nk->lists_host[m]->inum;
|
||||
if (nk->lists[m]->execution_space == Host) {
|
||||
NeighListKokkos<LMPHostType>* nlistKK = (NeighListKokkos<LMPHostType>*) nk->lists[m];
|
||||
inum = nlistKK->inum;
|
||||
#ifndef KOKKOS_USE_CUDA_UVM
|
||||
h_ilist = Kokkos::create_mirror_view(nk->lists_host[m]->d_ilist);
|
||||
h_numneigh = Kokkos::create_mirror_view(nk->lists_host[m]->d_numneigh);
|
||||
h_ilist = Kokkos::create_mirror_view(nlistKK->d_ilist);
|
||||
h_numneigh = Kokkos::create_mirror_view(nlistKK->d_numneigh);
|
||||
#else
|
||||
h_ilist = nk->lists_host[m]->d_ilist;
|
||||
h_numneigh = nk->lists_host[m]->d_numneigh;
|
||||
h_ilist = nlistKK->d_ilist;
|
||||
h_numneigh = nlistKK->d_numneigh;
|
||||
#endif
|
||||
Kokkos::deep_copy(h_ilist,nk->lists_host[m]->d_ilist);
|
||||
Kokkos::deep_copy(h_numneigh,nk->lists_host[m]->d_numneigh);
|
||||
} else if (nk->lists_device[m]) {
|
||||
inum = nk->lists_device[m]->inum;
|
||||
Kokkos::deep_copy(h_ilist,nlistKK->d_ilist);
|
||||
Kokkos::deep_copy(h_numneigh,nlistKK->d_numneigh);
|
||||
} else if (nk->lists[m]->execution_space == Device) {
|
||||
NeighListKokkos<LMPDeviceType>* nlistKK = (NeighListKokkos<LMPDeviceType>*) nk->lists[m];
|
||||
inum = nlistKK->inum;
|
||||
#ifndef KOKKOS_USE_CUDA_UVM
|
||||
h_ilist = Kokkos::create_mirror_view(nk->lists_device[m]->d_ilist);
|
||||
h_numneigh = Kokkos::create_mirror_view(nk->lists_device[m]->d_numneigh);
|
||||
h_ilist = Kokkos::create_mirror_view(nlistKK->d_ilist);
|
||||
h_numneigh = Kokkos::create_mirror_view(nlistKK->d_numneigh);
|
||||
#else
|
||||
h_ilist = nk->lists_device[m]->d_ilist;
|
||||
h_numneigh = nk->lists_device[m]->d_numneigh;
|
||||
h_ilist = nlistKK->d_ilist;
|
||||
h_numneigh = nlistKK->d_numneigh;
|
||||
#endif
|
||||
Kokkos::deep_copy(h_ilist,nk->lists_device[m]->d_ilist);
|
||||
Kokkos::deep_copy(h_numneigh,nk->lists_device[m]->d_numneigh);
|
||||
Kokkos::deep_copy(h_ilist,nlistKK->d_ilist);
|
||||
Kokkos::deep_copy(h_numneigh,nlistKK->d_numneigh);
|
||||
}
|
||||
|
||||
for (int i = 0; i < inum; i++) nneigh += h_numneigh[h_ilist[i]];
|
||||
|
||||
@ -34,7 +34,6 @@ class KokkosLMP : protected Pointers {
|
||||
KokkosLMP(class LAMMPS *, int, char **);
|
||||
~KokkosLMP();
|
||||
void accelerator(int, char **);
|
||||
int neigh_list_kokkos(int);
|
||||
int neigh_count(int);
|
||||
private:
|
||||
static void my_signal_handler(int);
|
||||
|
||||
145
src/KOKKOS/nbin_kokkos.cpp
Normal file
145
src/KOKKOS/nbin_kokkos.cpp
Normal file
@ -0,0 +1,145 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "nbin_kokkos.h"
|
||||
#include "neighbor.h"
|
||||
#include "atom_kokkos.h"
|
||||
#include "group.h"
|
||||
#include "domain.h"
|
||||
#include "comm.h"
|
||||
#include "update.h"
|
||||
#include "error.h"
|
||||
#include "atom_masks.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
enum{NSQ,BIN,MULTI}; // also in Neighbor
|
||||
|
||||
#define SMALL 1.0e-6
|
||||
#define CUT2BIN_RATIO 100
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
NBinKokkos<DeviceType>::NBinKokkos(LAMMPS *lmp) : NBinStandard(lmp) {
|
||||
atoms_per_bin = 16;
|
||||
|
||||
d_resize = typename AT::t_int_scalar("NeighborKokkosFunctor::resize");
|
||||
#ifndef KOKKOS_USE_CUDA_UVM
|
||||
h_resize = Kokkos::create_mirror_view(d_resize);
|
||||
#else
|
||||
h_resize = d_resize;
|
||||
#endif
|
||||
h_resize() = 1;
|
||||
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
setup neighbor binning geometry
|
||||
bin numbering in each dimension is global:
|
||||
0 = 0.0 to binsize, 1 = binsize to 2*binsize, etc
|
||||
nbin-1,nbin,etc = bbox-binsize to bbox, bbox to bbox+binsize, etc
|
||||
-1,-2,etc = -binsize to 0.0, -2*binsize to -binsize, etc
|
||||
code will work for any binsize
|
||||
since next(xyz) and stencil extend as far as necessary
|
||||
binsize = 1/2 of cutoff is roughly optimal
|
||||
for orthogonal boxes:
|
||||
a dim must be filled exactly by integer # of bins
|
||||
in periodic, procs on both sides of PBC must see same bin boundary
|
||||
in non-periodic, coord2bin() still assumes this by use of nbin xyz
|
||||
for triclinic boxes:
|
||||
tilted simulation box cannot contain integer # of bins
|
||||
stencil & neigh list built differently to account for this
|
||||
mbinlo = lowest global bin any of my ghost atoms could fall into
|
||||
mbinhi = highest global bin any of my ghost atoms could fall into
|
||||
mbin = number of bins I need in a dimension
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void NBinKokkos<DeviceType>::bin_atoms_setup(int nall)
|
||||
{
|
||||
if (mbins > k_bins.d_view.dimension_0()) {
|
||||
k_bins = DAT::tdual_int_2d("Neighbor::d_bins",mbins,atoms_per_bin);
|
||||
bins = k_bins.view<DeviceType>();
|
||||
|
||||
k_bincount = DAT::tdual_int_1d("Neighbor::d_bincount",mbins);
|
||||
bincount = k_bincount.view<DeviceType>();
|
||||
last_bin_memory = update->ntimestep;
|
||||
}
|
||||
|
||||
last_bin = update->ntimestep;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
bin owned and ghost atoms
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void NBinKokkos<DeviceType>::bin_atoms()
|
||||
{
|
||||
h_resize() = 1;
|
||||
|
||||
while(h_resize() > 0) {
|
||||
h_resize() = 0;
|
||||
deep_copy(d_resize, h_resize);
|
||||
|
||||
MemsetZeroFunctor<DeviceType> f_zero;
|
||||
f_zero.ptr = (void*) k_bincount.view<DeviceType>().ptr_on_device();
|
||||
Kokkos::parallel_for(mbins, f_zero);
|
||||
DeviceType::fence();
|
||||
|
||||
atomKK->sync(ExecutionSpaceFromDevice<DeviceType>::space,X_MASK);
|
||||
x = atomKK->k_x.view<DeviceType>();
|
||||
|
||||
bboxlo_[0] = bboxlo[0]; bboxlo_[1] = bboxlo[1]; bboxlo_[2] = bboxlo[2];
|
||||
bboxhi_[0] = bboxhi[0]; bboxhi_[1] = bboxhi[1]; bboxhi_[2] = bboxhi[2];
|
||||
|
||||
NPairKokkosBinAtomsFunctor<DeviceType> f(*this);
|
||||
|
||||
Kokkos::parallel_for(atom->nlocal+atom->nghost, f);
|
||||
DeviceType::fence();
|
||||
|
||||
deep_copy(h_resize, d_resize);
|
||||
if(h_resize()) {
|
||||
|
||||
atoms_per_bin += 16;
|
||||
k_bins = DAT::tdual_int_2d("bins", mbins, atoms_per_bin);
|
||||
bins = k_bins.view<DeviceType>();
|
||||
c_bins = bins;
|
||||
last_bin_memory = update->ntimestep;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void NBinKokkos<DeviceType>::binatomsItem(const int &i) const
|
||||
{
|
||||
const int ibin = coord2bin(x(i, 0), x(i, 1), x(i, 2));
|
||||
|
||||
const int ac = Kokkos::atomic_fetch_add(&bincount[ibin], (int)1);
|
||||
if(ac < bins.dimension_1()) {
|
||||
bins(ibin, ac) = i;
|
||||
} else {
|
||||
d_resize() = 1;
|
||||
}
|
||||
}
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
template class NBinKokkos<LMPDeviceType>;
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
template class NBinKokkos<LMPHostType>;
|
||||
#endif
|
||||
}
|
||||
153
src/KOKKOS/nbin_kokkos.h
Normal file
153
src/KOKKOS/nbin_kokkos.h
Normal file
@ -0,0 +1,153 @@
|
||||
/* -*- c++ -*- ----------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef NBIN_CLASS
|
||||
|
||||
NBinStyle(kk/host,
|
||||
NBinKokkos<LMPHostType>,
|
||||
NB_KOKKOS_HOST)
|
||||
|
||||
NBinStyle(kk/device,
|
||||
NBinKokkos<LMPDeviceType>,
|
||||
NB_KOKKOS_DEVICE)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_NBIN_KOKKOS_H
|
||||
#define LMP_NBIN_KOKKOS_H
|
||||
|
||||
#include "nbin_standard.h"
|
||||
#include "kokkos_type.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
template<class DeviceType>
|
||||
class NBinKokkos : public NBinStandard {
|
||||
public:
|
||||
typedef ArrayTypes<DeviceType> AT;
|
||||
|
||||
NBinKokkos(class LAMMPS *);
|
||||
~NBinKokkos() {}
|
||||
void bin_atoms_setup(int);
|
||||
void bin_atoms();
|
||||
|
||||
int atoms_per_bin;
|
||||
DAT::tdual_int_1d k_bincount;
|
||||
DAT::tdual_int_2d k_bins;
|
||||
|
||||
typename AT::t_int_1d bincount;
|
||||
const typename AT::t_int_1d_const c_bincount;
|
||||
typename AT::t_int_2d bins;
|
||||
typename AT::t_int_2d_const c_bins;
|
||||
typename AT::t_int_scalar d_resize;
|
||||
typename ArrayTypes<LMPHostType>::t_int_scalar h_resize;
|
||||
typename AT::t_x_array_randomread x;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void binatomsItem(const int &i) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
int coord2bin(const X_FLOAT & x,const X_FLOAT & y,const X_FLOAT & z) const
|
||||
{
|
||||
int ix,iy,iz;
|
||||
|
||||
if (x >= bboxhi_[0])
|
||||
ix = static_cast<int> ((x-bboxhi_[0])*bininvx) + nbinx;
|
||||
else if (x >= bboxlo_[0]) {
|
||||
ix = static_cast<int> ((x-bboxlo_[0])*bininvx);
|
||||
ix = MIN(ix,nbinx-1);
|
||||
} else
|
||||
ix = static_cast<int> ((x-bboxlo_[0])*bininvx) - 1;
|
||||
|
||||
if (y >= bboxhi_[1])
|
||||
iy = static_cast<int> ((y-bboxhi_[1])*bininvy) + nbiny;
|
||||
else if (y >= bboxlo_[1]) {
|
||||
iy = static_cast<int> ((y-bboxlo_[1])*bininvy);
|
||||
iy = MIN(iy,nbiny-1);
|
||||
} else
|
||||
iy = static_cast<int> ((y-bboxlo_[1])*bininvy) - 1;
|
||||
|
||||
if (z >= bboxhi_[2])
|
||||
iz = static_cast<int> ((z-bboxhi_[2])*bininvz) + nbinz;
|
||||
else if (z >= bboxlo_[2]) {
|
||||
iz = static_cast<int> ((z-bboxlo_[2])*bininvz);
|
||||
iz = MIN(iz,nbinz-1);
|
||||
} else
|
||||
iz = static_cast<int> ((z-bboxlo_[2])*bininvz) - 1;
|
||||
|
||||
return (iz-mbinzlo)*mbiny*mbinx + (iy-mbinylo)*mbinx + (ix-mbinxlo);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
int coord2bin(const X_FLOAT & x,const X_FLOAT & y,const X_FLOAT & z, int* i) const
|
||||
{
|
||||
int ix,iy,iz;
|
||||
|
||||
if (x >= bboxhi_[0])
|
||||
ix = static_cast<int> ((x-bboxhi_[0])*bininvx) + nbinx;
|
||||
else if (x >= bboxlo_[0]) {
|
||||
ix = static_cast<int> ((x-bboxlo_[0])*bininvx);
|
||||
ix = MIN(ix,nbinx-1);
|
||||
} else
|
||||
ix = static_cast<int> ((x-bboxlo_[0])*bininvx) - 1;
|
||||
|
||||
if (y >= bboxhi_[1])
|
||||
iy = static_cast<int> ((y-bboxhi_[1])*bininvy) + nbiny;
|
||||
else if (y >= bboxlo_[1]) {
|
||||
iy = static_cast<int> ((y-bboxlo_[1])*bininvy);
|
||||
iy = MIN(iy,nbiny-1);
|
||||
} else
|
||||
iy = static_cast<int> ((y-bboxlo_[1])*bininvy) - 1;
|
||||
|
||||
if (z >= bboxhi_[2])
|
||||
iz = static_cast<int> ((z-bboxhi_[2])*bininvz) + nbinz;
|
||||
else if (z >= bboxlo_[2]) {
|
||||
iz = static_cast<int> ((z-bboxlo_[2])*bininvz);
|
||||
iz = MIN(iz,nbinz-1);
|
||||
} else
|
||||
iz = static_cast<int> ((z-bboxlo_[2])*bininvz) - 1;
|
||||
|
||||
i[0] = ix - mbinxlo;
|
||||
i[1] = iy - mbinylo;
|
||||
i[2] = iz - mbinzlo;
|
||||
|
||||
return (iz-mbinzlo)*mbiny*mbinx + (iy-mbinylo)*mbinx + (ix-mbinxlo);
|
||||
}
|
||||
|
||||
private:
|
||||
double bboxlo_[3],bboxhi_[3];
|
||||
};
|
||||
|
||||
template<class DeviceType>
|
||||
struct NPairKokkosBinAtomsFunctor {
|
||||
typedef DeviceType device_type;
|
||||
|
||||
const NBinKokkos<DeviceType> c;
|
||||
|
||||
NPairKokkosBinAtomsFunctor(const NBinKokkos<DeviceType> &_c):
|
||||
c(_c) {};
|
||||
~NPairKokkosBinAtomsFunctor() {}
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (const int & i) const {
|
||||
c.binatomsItem(i);
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* ERROR/WARNING messages:
|
||||
|
||||
*/
|
||||
@ -34,9 +34,8 @@ void NeighListKokkos<Device>::clean_copy()
|
||||
|
||||
ipage = NULL;
|
||||
dpage = NULL;
|
||||
maxstencil = 0;
|
||||
ghostflag = 0;
|
||||
maxstencil_multi = 0;
|
||||
|
||||
maxatoms = 0;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
@ -70,49 +69,6 @@ void NeighListKokkos<Device>::grow(int nmax)
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class Device>
|
||||
void NeighListKokkos<Device>::stencil_allocate(int smax, int style)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (style == BIN) {
|
||||
if (smax > maxstencil) {
|
||||
maxstencil = smax;
|
||||
d_stencil =
|
||||
memory->create_kokkos(d_stencil,h_stencil,stencil,maxstencil,
|
||||
"neighlist:stencil");
|
||||
if (ghostflag) {
|
||||
memory->create_kokkos(d_stencilxyz,h_stencilxyz,stencilxyz,maxstencil,
|
||||
3,"neighlist:stencilxyz");
|
||||
}
|
||||
}
|
||||
|
||||
} else {
|
||||
int n = atom->ntypes;
|
||||
if (maxstencil_multi == 0) {
|
||||
nstencil_multi = new int[n+1];
|
||||
stencil_multi = new int*[n+1];
|
||||
distsq_multi = new double*[n+1];
|
||||
for (i = 1; i <= n; i++) {
|
||||
nstencil_multi[i] = 0;
|
||||
stencil_multi[i] = NULL;
|
||||
distsq_multi[i] = NULL;
|
||||
}
|
||||
}
|
||||
if (smax > maxstencil_multi) {
|
||||
maxstencil_multi = smax;
|
||||
for (i = 1; i <= n; i++) {
|
||||
memory->destroy(stencil_multi[i]);
|
||||
memory->destroy(distsq_multi[i]);
|
||||
memory->create(stencil_multi[i],maxstencil_multi,
|
||||
"neighlist:stencil_multi");
|
||||
memory->create(distsq_multi[i],maxstencil_multi,
|
||||
"neighlist:distsq_multi");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
template class NeighListKokkos<LMPDeviceType>;
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
|
||||
@ -20,7 +20,7 @@
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
enum{FULL=1u,HALFTHREAD=2u,HALF=4u,N2=8u,FULLCLUSTER=16u};
|
||||
enum{FULL=1u,HALFTHREAD=2u,HALF=4u,N2=8u};
|
||||
|
||||
class AtomNeighbors
|
||||
{
|
||||
@ -74,14 +74,12 @@ public:
|
||||
typename DAT::tdual_int_1d k_ilist; // local indices of I atoms
|
||||
typename ArrayTypes<Device>::t_int_1d d_ilist;
|
||||
typename ArrayTypes<Device>::t_int_1d d_numneigh; // # of J neighs for each I
|
||||
typename ArrayTypes<Device>::t_int_1d d_stencil; // # of J neighs for each I
|
||||
typename ArrayTypes<LMPHostType>::t_int_1d h_stencil; // # of J neighs per I
|
||||
typename ArrayTypes<Device>::t_int_1d_3 d_stencilxyz;
|
||||
typename ArrayTypes<LMPHostType>::t_int_1d_3 h_stencilxyz;
|
||||
|
||||
NeighListKokkos(class LAMMPS *lmp):
|
||||
NeighList(lmp) {_stride = 1; maxneighs = 16;};
|
||||
~NeighListKokkos() {stencil = NULL; numneigh = NULL; ilist = NULL;};
|
||||
NeighList(lmp) {_stride = 1; maxneighs = 16; kokkos = 1; maxatoms = 0;
|
||||
execution_space = ExecutionSpaceFromDevice<Device>::space;
|
||||
};
|
||||
~NeighListKokkos() {numneigh = NULL; ilist = NULL;};
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
AtomNeighbors get_neighbors(const int &i) const {
|
||||
@ -99,7 +97,8 @@ public:
|
||||
int& num_neighs(const int & i) const {
|
||||
return d_numneigh(i);
|
||||
}
|
||||
void stencil_allocate(int smax, int style);
|
||||
private:
|
||||
int maxatoms;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
;/* ----------------------------------------------------------------------
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
@ -26,6 +26,10 @@
|
||||
#include "angle.h"
|
||||
#include "dihedral.h"
|
||||
#include "improper.h"
|
||||
#include "style_nbin.h"
|
||||
#include "style_nstencil.h"
|
||||
#include "style_npair.h"
|
||||
#include "style_ntopo.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
@ -36,18 +40,11 @@ enum{NSQ,BIN,MULTI}; // also in neigh_list.cpp
|
||||
NeighborKokkos::NeighborKokkos(LAMMPS *lmp) : Neighbor(lmp),
|
||||
neighbond_host(lmp),neighbond_device(lmp)
|
||||
{
|
||||
atoms_per_bin = 16;
|
||||
|
||||
nlist_host = 0;
|
||||
lists_host = NULL;
|
||||
pair_build_host = NULL;
|
||||
stencil_create_host = NULL;
|
||||
nlist_device = 0;
|
||||
lists_device = NULL;
|
||||
pair_build_device = NULL;
|
||||
stencil_create_device = NULL;
|
||||
|
||||
device_flag = 0;
|
||||
bondlist = NULL;
|
||||
anglelist = NULL;
|
||||
dihedrallist = NULL;
|
||||
improperlist = NULL;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
@ -58,14 +55,6 @@ NeighborKokkos::~NeighborKokkos()
|
||||
memory->destroy_kokkos(k_cutneighsq,cutneighsq);
|
||||
cutneighsq = NULL;
|
||||
|
||||
for (int i = 0; i < nlist_host; i++) delete lists_host[i];
|
||||
delete [] lists_host;
|
||||
for (int i = 0; i < nlist_device; i++) delete lists_device[i];
|
||||
delete [] lists_device;
|
||||
|
||||
delete [] pair_build_device;
|
||||
delete [] pair_build_host;
|
||||
|
||||
memory->destroy_kokkos(k_ex_type,ex_type);
|
||||
memory->destroy_kokkos(k_ex1_type,ex1_type);
|
||||
memory->destroy_kokkos(k_ex2_type,ex2_type);
|
||||
@ -89,6 +78,11 @@ void NeighborKokkos::init()
|
||||
{
|
||||
atomKK = (AtomKokkos *) atom;
|
||||
Neighbor::init();
|
||||
|
||||
// 1st time allocation of xhold
|
||||
|
||||
if (dist_check)
|
||||
xhold = DAT::tdual_x_array("neigh:xhold",maxhold);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
@ -101,158 +95,16 @@ void NeighborKokkos::init_cutneighsq_kokkos(int n)
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
int NeighborKokkos::init_lists_kokkos()
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < nlist_host; i++) delete lists_host[i];
|
||||
delete [] lists_host;
|
||||
delete [] pair_build_host;
|
||||
delete [] stencil_create_host;
|
||||
nlist_host = 0;
|
||||
|
||||
for (i = 0; i < nlist_device; i++) delete lists_device[i];
|
||||
delete [] lists_device;
|
||||
delete [] pair_build_device;
|
||||
delete [] stencil_create_device;
|
||||
nlist_device = 0;
|
||||
|
||||
nlist = 0;
|
||||
for (i = 0; i < nrequest; i++) {
|
||||
if (requests[i]->kokkos_device) nlist_device++;
|
||||
else if (requests[i]->kokkos_host) nlist_host++;
|
||||
else nlist++;
|
||||
}
|
||||
|
||||
lists_host = new NeighListKokkos<LMPHostType>*[nrequest];
|
||||
pair_build_host = new PairPtrHost[nrequest];
|
||||
stencil_create_host = new StencilPtrHost[nrequest];
|
||||
for (i = 0; i < nrequest; i++) {
|
||||
lists_host[i] = NULL;
|
||||
pair_build_host[i] = NULL;
|
||||
stencil_create_host[i] = NULL;
|
||||
}
|
||||
|
||||
for (i = 0; i < nrequest; i++) {
|
||||
if (!requests[i]->kokkos_host) continue;
|
||||
lists_host[i] = new NeighListKokkos<LMPHostType>(lmp);
|
||||
lists_host[i]->index = i;
|
||||
lists_host[i]->dnum = requests[i]->dnum;
|
||||
if (requests[i]->pair) {
|
||||
Pair *pair = (Pair *) requests[i]->requestor;
|
||||
pair->init_list(requests[i]->id,lists_host[i]);
|
||||
}
|
||||
if (requests[i]->fix) {
|
||||
Fix *fix = (Fix *) requests[i]->requestor;
|
||||
fix->init_list(requests[i]->id,lists_host[i]);
|
||||
}
|
||||
}
|
||||
|
||||
lists_device = new NeighListKokkos<LMPDeviceType>*[nrequest];
|
||||
pair_build_device = new PairPtrDevice[nrequest];
|
||||
stencil_create_device = new StencilPtrDevice[nrequest];
|
||||
for (i = 0; i < nrequest; i++) {
|
||||
lists_device[i] = NULL;
|
||||
pair_build_device[i] = NULL;
|
||||
stencil_create_device[i] = NULL;
|
||||
}
|
||||
|
||||
for (i = 0; i < nrequest; i++) {
|
||||
if (!requests[i]->kokkos_device) continue;
|
||||
lists_device[i] = new NeighListKokkos<LMPDeviceType>(lmp);
|
||||
lists_device[i]->index = i;
|
||||
lists_device[i]->dnum = requests[i]->dnum;
|
||||
if (requests[i]->pair) {
|
||||
Pair *pair = (Pair *) requests[i]->requestor;
|
||||
pair->init_list(requests[i]->id,lists_device[i]);
|
||||
}
|
||||
if (requests[i]->fix) {
|
||||
Fix *fix = (Fix *) requests[i]->requestor;
|
||||
fix->init_list(requests[i]->id,lists_device[i]);
|
||||
}
|
||||
}
|
||||
|
||||
// 1st time allocation of xhold
|
||||
|
||||
if (dist_check)
|
||||
xhold = DAT::tdual_x_array("neigh:xhold",maxhold);
|
||||
|
||||
// return # of non-Kokkos lists
|
||||
|
||||
return nlist;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void NeighborKokkos::init_list_flags1_kokkos(int i)
|
||||
void NeighborKokkos::create_kokkos_list(int i)
|
||||
{
|
||||
if (style != BIN)
|
||||
error->all(FLERR,"KOKKOS package only supports 'bin' neighbor lists");
|
||||
|
||||
if (lists_host[i]) {
|
||||
lists_host[i]->buildflag = 1;
|
||||
if (pair_build_host[i] == NULL) lists_host[i]->buildflag = 0;
|
||||
if (requests[i]->occasional) lists_host[i]->buildflag = 0;
|
||||
|
||||
lists_host[i]->growflag = 1;
|
||||
if (requests[i]->copy) lists_host[i]->growflag = 0;
|
||||
|
||||
lists_host[i]->stencilflag = 1;
|
||||
if (style == NSQ) lists_host[i]->stencilflag = 0;
|
||||
if (stencil_create[i] == NULL) lists_host[i]->stencilflag = 0;
|
||||
|
||||
lists_host[i]->ghostflag = 0;
|
||||
if (requests[i]->ghost) lists_host[i]->ghostflag = 1;
|
||||
if (requests[i]->ghost && !requests[i]->occasional) anyghostlist = 1;
|
||||
}
|
||||
|
||||
if (lists_device[i]) {
|
||||
lists_device[i]->buildflag = 1;
|
||||
if (pair_build_device[i] == NULL) lists_device[i]->buildflag = 0;
|
||||
if (requests[i]->occasional) lists_device[i]->buildflag = 0;
|
||||
|
||||
lists_device[i]->growflag = 1;
|
||||
if (requests[i]->copy) lists_device[i]->growflag = 0;
|
||||
|
||||
lists_device[i]->stencilflag = 1;
|
||||
if (style == NSQ) lists_device[i]->stencilflag = 0;
|
||||
if (stencil_create[i] == NULL) lists_device[i]->stencilflag = 0;
|
||||
|
||||
lists_device[i]->ghostflag = 0;
|
||||
if (requests[i]->ghost) lists_device[i]->ghostflag = 1;
|
||||
if (requests[i]->ghost && !requests[i]->occasional) anyghostlist = 1;
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void NeighborKokkos::init_list_flags2_kokkos(int i)
|
||||
{
|
||||
if (lists_host[i]) {
|
||||
if (lists_host[i]->buildflag) blist[nblist++] = i;
|
||||
if (lists_host[i]->growflag && requests[i]->occasional == 0)
|
||||
glist[nglist++] = i;
|
||||
if (lists_host[i]->stencilflag && requests[i]->occasional == 0)
|
||||
slist[nslist++] = i;
|
||||
}
|
||||
|
||||
if (lists_device[i]) {
|
||||
if (lists_device[i]->buildflag) blist[nblist++] = i;
|
||||
if (lists_device[i]->growflag && requests[i]->occasional == 0)
|
||||
glist[nglist++] = i;
|
||||
if (lists_device[i]->stencilflag && requests[i]->occasional == 0)
|
||||
slist[nslist++] = i;
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void NeighborKokkos::init_list_grow_kokkos(int i)
|
||||
{
|
||||
if (lists_host[i]!=NULL && lists_host[i]->growflag)
|
||||
lists_host[i]->grow(maxatom);
|
||||
if (lists_device[i]!=NULL && lists_device[i]->growflag)
|
||||
lists_device[i]->grow(maxatom);
|
||||
if (requests[i]->kokkos_device) {
|
||||
lists[i] = new NeighListKokkos<LMPDeviceType>(lmp);
|
||||
device_flag = 1;
|
||||
} else if (requests[i]->kokkos_host)
|
||||
lists[i] = new NeighListKokkos<LMPHostType>(lmp);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
@ -281,49 +133,6 @@ void NeighborKokkos::init_ex_mol_bit_kokkos()
|
||||
k_ex_mol_bit.modify<LMPHostType>();
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void NeighborKokkos::choose_build(int index, NeighRequest *rq)
|
||||
{
|
||||
if (rq->kokkos_host != 0) {
|
||||
PairPtrHost pb = NULL;
|
||||
if (rq->ghost) {
|
||||
if (rq->full) {
|
||||
if (rq->full_cluster) pb = &NeighborKokkos::full_bin_cluster_kokkos<LMPHostType>;
|
||||
else pb = &NeighborKokkos::full_bin_kokkos<LMPHostType,0,1>;
|
||||
}
|
||||
else if (rq->half) pb = &NeighborKokkos::full_bin_kokkos<LMPHostType,1,1>;
|
||||
} else {
|
||||
if (rq->full) {
|
||||
if (rq->full_cluster) pb = &NeighborKokkos::full_bin_cluster_kokkos<LMPHostType>;
|
||||
else pb = &NeighborKokkos::full_bin_kokkos<LMPHostType,0,0>;
|
||||
}
|
||||
else if (rq->half) pb = &NeighborKokkos::full_bin_kokkos<LMPHostType,1,0>;
|
||||
}
|
||||
pair_build_host[index] = pb;
|
||||
}
|
||||
if (rq->kokkos_device != 0) {
|
||||
PairPtrDevice pb = NULL;
|
||||
if (rq->ghost) {
|
||||
if (rq->full) {
|
||||
if (rq->full_cluster) pb = &NeighborKokkos::full_bin_cluster_kokkos<LMPDeviceType>;
|
||||
else pb = &NeighborKokkos::full_bin_kokkos<LMPDeviceType,0,1>;
|
||||
}
|
||||
else if (rq->half) pb = &NeighborKokkos::full_bin_kokkos<LMPDeviceType,1,1>;
|
||||
} else {
|
||||
if (rq->full) {
|
||||
if (rq->full_cluster) pb = &NeighborKokkos::full_bin_cluster_kokkos<LMPDeviceType>;
|
||||
else pb = &NeighborKokkos::full_bin_kokkos<LMPDeviceType,0,0>;
|
||||
}
|
||||
else if (rq->half) pb = &NeighborKokkos::full_bin_kokkos<LMPDeviceType,1,0>;
|
||||
}
|
||||
pair_build_device[index] = pb;
|
||||
return;
|
||||
}
|
||||
|
||||
Neighbor::choose_build(index,rq);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
if any atom moved trigger distance (half of neighbor skin) return 1
|
||||
shrink trigger distance if box size has changed
|
||||
@ -337,7 +146,7 @@ void NeighborKokkos::choose_build(int index, NeighRequest *rq)
|
||||
|
||||
int NeighborKokkos::check_distance()
|
||||
{
|
||||
if (nlist_device)
|
||||
if (device_flag)
|
||||
check_distance_kokkos<LMPDeviceType>();
|
||||
else
|
||||
check_distance_kokkos<LMPHostType>();
|
||||
@ -417,7 +226,7 @@ void NeighborKokkos::operator()(TagNeighborCheckDistance<DeviceType>, const int
|
||||
|
||||
void NeighborKokkos::build(int topoflag)
|
||||
{
|
||||
if (nlist_device)
|
||||
if (device_flag)
|
||||
build_kokkos<LMPDeviceType>(topoflag);
|
||||
else
|
||||
build_kokkos<LMPHostType>(topoflag);
|
||||
@ -428,18 +237,25 @@ void NeighborKokkos::build_kokkos(int topoflag)
|
||||
{
|
||||
typedef DeviceType device_type;
|
||||
|
||||
int i;
|
||||
int i,m;
|
||||
|
||||
ago = 0;
|
||||
ncalls++;
|
||||
lastcall = update->ntimestep;
|
||||
|
||||
int nlocal = atom->nlocal;
|
||||
int nall = nlocal + atom->nghost;
|
||||
|
||||
// check that using special bond flags will not overflow neigh lists
|
||||
|
||||
if (nall > NEIGHMASK)
|
||||
error->one(FLERR,"Too many local+ghost atoms for neighbor list");
|
||||
|
||||
// store current atom positions and box size if needed
|
||||
|
||||
if (dist_check) {
|
||||
atomKK->sync(ExecutionSpaceFromDevice<DeviceType>::space,X_MASK);
|
||||
x = atomKK->k_x;
|
||||
int nlocal = atom->nlocal;
|
||||
if (includegroup) nlocal = atom->nfirst;
|
||||
int maxhold_kokkos = xhold.view<DeviceType>().dimension_0();
|
||||
if (atom->nmax > maxhold || maxhold_kokkos < maxhold) {
|
||||
@ -471,54 +287,33 @@ void NeighborKokkos::build_kokkos(int topoflag)
|
||||
}
|
||||
}
|
||||
|
||||
// if any lists store neighbors of ghosts:
|
||||
// invoke grow() if nlocal+nghost exceeds previous list size
|
||||
// else only invoke grow() if nlocal exceeds previous list size
|
||||
// only for lists with growflag set and which are perpetual (glist)
|
||||
// bin atoms for all NBin instances
|
||||
// not just NBin associated with perpetual lists
|
||||
// b/c cannot wait to bin occasional lists in build_one() call
|
||||
// if bin then, atoms may have moved outside of proc domain & bin extent,
|
||||
// leading to errors or even a crash
|
||||
|
||||
if (anyghostlist && atom->nmax > maxatom) {
|
||||
maxatom = atom->nmax;
|
||||
for (i = 0; i < nglist; i++)
|
||||
if (lists[glist[i]]) lists[glist[i]]->grow(maxatom);
|
||||
else init_list_grow_kokkos(glist[i]);
|
||||
} else if (atom->nmax > maxatom) {
|
||||
maxatom = atom->nmax;
|
||||
for (i = 0; i < nglist; i++)
|
||||
if (lists[glist[i]]) lists[glist[i]]->grow(maxatom);
|
||||
else init_list_grow_kokkos(glist[i]);
|
||||
if (style != NSQ) {
|
||||
for (int i = 0; i < nbin; i++) {
|
||||
neigh_bin[i]->bin_atoms_setup(nall);
|
||||
neigh_bin[i]->bin_atoms();
|
||||
}
|
||||
}
|
||||
|
||||
// extend atom bin list if necessary
|
||||
// build pairwise lists for all perpetual NPair/NeighList
|
||||
// grow() with nlocal/nall args so that only realloc if have to
|
||||
|
||||
if (style != NSQ && atom->nmax > maxbin) {
|
||||
maxbin = atom->nmax;
|
||||
memory->destroy(bins);
|
||||
memory->create(bins,maxbin,"bins");
|
||||
}
|
||||
|
||||
// check that using special bond flags will not overflow neigh lists
|
||||
|
||||
if (atom->nlocal+atom->nghost > NEIGHMASK)
|
||||
error->one(FLERR,"Too many local+ghost atoms for neighbor list");
|
||||
|
||||
// invoke building of pair and molecular topology neighbor lists
|
||||
// only for pairwise lists with buildflag set
|
||||
// blist is for standard neigh lists, otherwise is a Kokkos list
|
||||
|
||||
for (i = 0; i < nblist; i++) {
|
||||
if (lists[blist[i]]) {
|
||||
atomKK->sync(Host,ALL_MASK);
|
||||
(this->*pair_build[blist[i]])(lists[blist[i]]);
|
||||
} else {
|
||||
if (lists_host[blist[i]])
|
||||
(this->*pair_build_host[blist[i]])(lists_host[blist[i]]);
|
||||
else if (lists_device[blist[i]])
|
||||
(this->*pair_build_device[blist[i]])(lists_device[blist[i]]);
|
||||
}
|
||||
for (i = 0; i < npair_perpetual; i++) {
|
||||
m = plist[i];
|
||||
lists[m]->grow(nlocal,nall);
|
||||
neigh_pair[m]->build_setup();
|
||||
neigh_pair[m]->build(lists[m]);
|
||||
}
|
||||
|
||||
if (atom->molecular && topoflag)
|
||||
build_topology_kokkos();
|
||||
// build topology lists for bonds/angles/etc
|
||||
|
||||
if (atom->molecular && topoflag) build_topology();
|
||||
}
|
||||
|
||||
template<class DeviceType>
|
||||
@ -532,26 +327,6 @@ void NeighborKokkos::operator()(TagNeighborXhold<DeviceType>, const int &i) cons
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void NeighborKokkos::setup_bins_kokkos(int i)
|
||||
{
|
||||
if (lists_host[slist[i]]) {
|
||||
lists_host[slist[i]]->stencil_allocate(smax,style);
|
||||
(this->*stencil_create[slist[i]])(lists_host[slist[i]],sx,sy,sz);
|
||||
} else if (lists_device[slist[i]]) {
|
||||
lists_device[slist[i]]->stencil_allocate(smax,style);
|
||||
(this->*stencil_create[slist[i]])(lists_device[slist[i]],sx,sy,sz);
|
||||
}
|
||||
|
||||
//if (i < nslist-1) return; // this won't work if a non-kokkos neighbor list is last
|
||||
|
||||
if (maxhead > k_bins.d_view.dimension_0()) {
|
||||
k_bins = DAT::tdual_int_2d("Neighbor::d_bins",maxhead,atoms_per_bin);
|
||||
k_bincount = DAT::tdual_int_1d("Neighbor::d_bincount",maxhead);
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void NeighborKokkos::modify_ex_type_grow_kokkos(){
|
||||
memory->grow_kokkos(k_ex1_type,ex1_type,maxex_type,"neigh:ex1_type");
|
||||
k_ex1_type.modify<LMPHostType>();
|
||||
@ -575,8 +350,8 @@ void NeighborKokkos::modify_mol_group_grow_kokkos(){
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void NeighborKokkos::init_topology_kokkos() {
|
||||
if (nlist_device) {
|
||||
void NeighborKokkos::init_topology() {
|
||||
if (device_flag) {
|
||||
neighbond_device.init_topology_kk();
|
||||
} else {
|
||||
neighbond_host.init_topology_kk();
|
||||
@ -588,8 +363,8 @@ void NeighborKokkos::init_topology_kokkos() {
|
||||
normally built with pair lists, but USER-CUDA separates them
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void NeighborKokkos::build_topology_kokkos() {
|
||||
if (nlist_device) {
|
||||
void NeighborKokkos::build_topology() {
|
||||
if (device_flag) {
|
||||
neighbond_device.build_topology_kk();
|
||||
|
||||
k_bondlist = neighbond_device.k_bondlist;
|
||||
@ -637,7 +412,3 @@ void NeighborKokkos::build_topology_kokkos() {
|
||||
k_improperlist.modify<LMPHostType>();
|
||||
}
|
||||
}
|
||||
|
||||
// include to trigger instantiation of templated functions
|
||||
|
||||
#include "neigh_full_kokkos.h"
|
||||
|
||||
@ -22,316 +22,6 @@
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
template<class Device>
|
||||
class NeighborKokkosExecute
|
||||
{
|
||||
typedef ArrayTypes<Device> AT;
|
||||
|
||||
public:
|
||||
NeighListKokkos<Device> neigh_list;
|
||||
const typename AT::t_xfloat_2d_randomread cutneighsq;
|
||||
const typename AT::t_int_1d bincount;
|
||||
const typename AT::t_int_1d_const c_bincount;
|
||||
typename AT::t_int_2d bins;
|
||||
typename AT::t_int_2d_const c_bins;
|
||||
const typename AT::t_x_array_randomread x;
|
||||
const typename AT::t_int_1d_const type,mask,molecule;
|
||||
|
||||
const typename AT::t_tagint_1d_const tag;
|
||||
const typename AT::t_tagint_2d_const special;
|
||||
const typename AT::t_int_2d_const nspecial;
|
||||
const int molecular;
|
||||
int moltemplate;
|
||||
|
||||
int special_flag[4];
|
||||
|
||||
const int nbinx,nbiny,nbinz;
|
||||
const int mbinx,mbiny,mbinz;
|
||||
const int mbinxlo,mbinylo,mbinzlo;
|
||||
const X_FLOAT bininvx,bininvy,bininvz;
|
||||
X_FLOAT bboxhi[3],bboxlo[3];
|
||||
|
||||
const int nlocal;
|
||||
|
||||
const int exclude;
|
||||
|
||||
const int nex_type;
|
||||
const int maxex_type;
|
||||
const typename AT::t_int_1d_const ex1_type,ex2_type;
|
||||
const typename AT::t_int_2d_const ex_type;
|
||||
|
||||
const int nex_group;
|
||||
const int maxex_group;
|
||||
const typename AT::t_int_1d_const ex1_group,ex2_group;
|
||||
const typename AT::t_int_1d_const ex1_bit,ex2_bit;
|
||||
|
||||
const int nex_mol;
|
||||
const int maxex_mol;
|
||||
const typename AT::t_int_1d_const ex_mol_group;
|
||||
const typename AT::t_int_1d_const ex_mol_bit;
|
||||
|
||||
typename AT::t_int_scalar resize;
|
||||
typename AT::t_int_scalar new_maxneighs;
|
||||
typename ArrayTypes<LMPHostType>::t_int_scalar h_resize;
|
||||
typename ArrayTypes<LMPHostType>::t_int_scalar h_new_maxneighs;
|
||||
|
||||
const int xperiodic, yperiodic, zperiodic;
|
||||
const int xprd_half, yprd_half, zprd_half;
|
||||
|
||||
NeighborKokkosExecute(
|
||||
const NeighListKokkos<Device> &_neigh_list,
|
||||
const typename AT::t_xfloat_2d_randomread &_cutneighsq,
|
||||
const typename AT::t_int_1d &_bincount,
|
||||
const typename AT::t_int_2d &_bins,
|
||||
const int _nlocal,
|
||||
const typename AT::t_x_array_randomread &_x,
|
||||
const typename AT::t_int_1d_const &_type,
|
||||
const typename AT::t_int_1d_const &_mask,
|
||||
const typename AT::t_int_1d_const &_molecule,
|
||||
const typename AT::t_tagint_1d_const &_tag,
|
||||
const typename AT::t_tagint_2d_const &_special,
|
||||
const typename AT::t_int_2d_const &_nspecial,
|
||||
const int &_molecular,
|
||||
const int & _nbinx,const int & _nbiny,const int & _nbinz,
|
||||
const int & _mbinx,const int & _mbiny,const int & _mbinz,
|
||||
const int & _mbinxlo,const int & _mbinylo,const int & _mbinzlo,
|
||||
const X_FLOAT &_bininvx,const X_FLOAT &_bininvy,const X_FLOAT &_bininvz,
|
||||
const int & _exclude,const int & _nex_type,const int & _maxex_type,
|
||||
const typename AT::t_int_1d_const & _ex1_type,
|
||||
const typename AT::t_int_1d_const & _ex2_type,
|
||||
const typename AT::t_int_2d_const & _ex_type,
|
||||
const int & _nex_group,const int & _maxex_group,
|
||||
const typename AT::t_int_1d_const & _ex1_group,
|
||||
const typename AT::t_int_1d_const & _ex2_group,
|
||||
const typename AT::t_int_1d_const & _ex1_bit,
|
||||
const typename AT::t_int_1d_const & _ex2_bit,
|
||||
const int & _nex_mol,const int & _maxex_mol,
|
||||
const typename AT::t_int_1d_const & _ex_mol_group,
|
||||
const typename AT::t_int_1d_const & _ex_mol_bit,
|
||||
const X_FLOAT *_bboxhi, const X_FLOAT* _bboxlo,
|
||||
const int & _xperiodic, const int & _yperiodic, const int & _zperiodic,
|
||||
const int & _xprd_half, const int & _yprd_half, const int & _zprd_half):
|
||||
neigh_list(_neigh_list), cutneighsq(_cutneighsq),
|
||||
bincount(_bincount),c_bincount(_bincount),bins(_bins),c_bins(_bins),
|
||||
nlocal(_nlocal),
|
||||
x(_x),type(_type),mask(_mask),molecule(_molecule),
|
||||
tag(_tag),special(_special),nspecial(_nspecial),molecular(_molecular),
|
||||
nbinx(_nbinx),nbiny(_nbiny),nbinz(_nbinz),
|
||||
mbinx(_mbinx),mbiny(_mbiny),mbinz(_mbinz),
|
||||
mbinxlo(_mbinxlo),mbinylo(_mbinylo),mbinzlo(_mbinzlo),
|
||||
bininvx(_bininvx),bininvy(_bininvy),bininvz(_bininvz),
|
||||
exclude(_exclude),nex_type(_nex_type),maxex_type(_maxex_type),
|
||||
ex1_type(_ex1_type),ex2_type(_ex2_type),ex_type(_ex_type),
|
||||
nex_group(_nex_group),maxex_group(_maxex_group),
|
||||
ex1_group(_ex1_group),ex2_group(_ex2_group),
|
||||
ex1_bit(_ex1_bit),ex2_bit(_ex2_bit),nex_mol(_nex_mol),maxex_mol(_maxex_mol),
|
||||
ex_mol_group(_ex_mol_group),ex_mol_bit(_ex_mol_bit),
|
||||
xperiodic(_xperiodic),yperiodic(_yperiodic),zperiodic(_zperiodic),
|
||||
xprd_half(_xprd_half),yprd_half(_yprd_half),zprd_half(_zprd_half){
|
||||
|
||||
if (molecular == 2) moltemplate = 1;
|
||||
else moltemplate = 0;
|
||||
|
||||
bboxlo[0] = _bboxlo[0]; bboxlo[1] = _bboxlo[1]; bboxlo[2] = _bboxlo[2];
|
||||
bboxhi[0] = _bboxhi[0]; bboxhi[1] = _bboxhi[1]; bboxhi[2] = _bboxhi[2];
|
||||
|
||||
resize = typename AT::t_int_scalar("NeighborKokkosFunctor::resize");
|
||||
#ifndef KOKKOS_USE_CUDA_UVM
|
||||
h_resize = Kokkos::create_mirror_view(resize);
|
||||
#else
|
||||
h_resize = resize;
|
||||
#endif
|
||||
h_resize() = 1;
|
||||
new_maxneighs = typename AT::
|
||||
t_int_scalar("NeighborKokkosFunctor::new_maxneighs");
|
||||
#ifndef KOKKOS_USE_CUDA_UVM
|
||||
h_new_maxneighs = Kokkos::create_mirror_view(new_maxneighs);
|
||||
#else
|
||||
h_new_maxneighs = new_maxneighs;
|
||||
#endif
|
||||
h_new_maxneighs() = neigh_list.maxneighs;
|
||||
};
|
||||
|
||||
~NeighborKokkosExecute() {neigh_list.clean_copy();};
|
||||
|
||||
template<int HalfNeigh, int GhostNewton>
|
||||
KOKKOS_FUNCTION
|
||||
void build_Item(const int &i) const;
|
||||
|
||||
template<int HalfNeigh>
|
||||
KOKKOS_FUNCTION
|
||||
void build_Item_Ghost(const int &i) const;
|
||||
|
||||
template<int ClusterSize>
|
||||
KOKKOS_FUNCTION
|
||||
void build_cluster_Item(const int &i) const;
|
||||
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
template<int HalfNeigh, int GhostNewton>
|
||||
__device__ inline
|
||||
void build_ItemCuda(typename Kokkos::TeamPolicy<Device>::member_type dev) const;
|
||||
#endif
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void binatomsItem(const int &i) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
int coord2bin(const X_FLOAT & x,const X_FLOAT & y,const X_FLOAT & z) const
|
||||
{
|
||||
int ix,iy,iz;
|
||||
|
||||
if (x >= bboxhi[0])
|
||||
ix = static_cast<int> ((x-bboxhi[0])*bininvx) + nbinx;
|
||||
else if (x >= bboxlo[0]) {
|
||||
ix = static_cast<int> ((x-bboxlo[0])*bininvx);
|
||||
ix = MIN(ix,nbinx-1);
|
||||
} else
|
||||
ix = static_cast<int> ((x-bboxlo[0])*bininvx) - 1;
|
||||
|
||||
if (y >= bboxhi[1])
|
||||
iy = static_cast<int> ((y-bboxhi[1])*bininvy) + nbiny;
|
||||
else if (y >= bboxlo[1]) {
|
||||
iy = static_cast<int> ((y-bboxlo[1])*bininvy);
|
||||
iy = MIN(iy,nbiny-1);
|
||||
} else
|
||||
iy = static_cast<int> ((y-bboxlo[1])*bininvy) - 1;
|
||||
|
||||
if (z >= bboxhi[2])
|
||||
iz = static_cast<int> ((z-bboxhi[2])*bininvz) + nbinz;
|
||||
else if (z >= bboxlo[2]) {
|
||||
iz = static_cast<int> ((z-bboxlo[2])*bininvz);
|
||||
iz = MIN(iz,nbinz-1);
|
||||
} else
|
||||
iz = static_cast<int> ((z-bboxlo[2])*bininvz) - 1;
|
||||
|
||||
return (iz-mbinzlo)*mbiny*mbinx + (iy-mbinylo)*mbinx + (ix-mbinxlo);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
int coord2bin(const X_FLOAT & x,const X_FLOAT & y,const X_FLOAT & z, int* i) const
|
||||
{
|
||||
int ix,iy,iz;
|
||||
|
||||
if (x >= bboxhi[0])
|
||||
ix = static_cast<int> ((x-bboxhi[0])*bininvx) + nbinx;
|
||||
else if (x >= bboxlo[0]) {
|
||||
ix = static_cast<int> ((x-bboxlo[0])*bininvx);
|
||||
ix = MIN(ix,nbinx-1);
|
||||
} else
|
||||
ix = static_cast<int> ((x-bboxlo[0])*bininvx) - 1;
|
||||
|
||||
if (y >= bboxhi[1])
|
||||
iy = static_cast<int> ((y-bboxhi[1])*bininvy) + nbiny;
|
||||
else if (y >= bboxlo[1]) {
|
||||
iy = static_cast<int> ((y-bboxlo[1])*bininvy);
|
||||
iy = MIN(iy,nbiny-1);
|
||||
} else
|
||||
iy = static_cast<int> ((y-bboxlo[1])*bininvy) - 1;
|
||||
|
||||
if (z >= bboxhi[2])
|
||||
iz = static_cast<int> ((z-bboxhi[2])*bininvz) + nbinz;
|
||||
else if (z >= bboxlo[2]) {
|
||||
iz = static_cast<int> ((z-bboxlo[2])*bininvz);
|
||||
iz = MIN(iz,nbinz-1);
|
||||
} else
|
||||
iz = static_cast<int> ((z-bboxlo[2])*bininvz) - 1;
|
||||
|
||||
i[0] = ix - mbinxlo;
|
||||
i[1] = iy - mbinylo;
|
||||
i[2] = iz - mbinzlo;
|
||||
|
||||
return (iz-mbinzlo)*mbiny*mbinx + (iy-mbinylo)*mbinx + (ix-mbinxlo);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
int exclusion(const int &i,const int &j, const int &itype,const int &jtype) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
int find_special(const int &i, const int &j) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
int minimum_image_check(double dx, double dy, double dz) const {
|
||||
if (xperiodic && fabs(dx) > xprd_half) return 1;
|
||||
if (yperiodic && fabs(dy) > yprd_half) return 1;
|
||||
if (zperiodic && fabs(dz) > zprd_half) return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
template<class Device>
|
||||
struct NeighborKokkosBinAtomsFunctor {
|
||||
typedef Device device_type;
|
||||
|
||||
const NeighborKokkosExecute<Device> c;
|
||||
|
||||
NeighborKokkosBinAtomsFunctor(const NeighborKokkosExecute<Device> &_c):
|
||||
c(_c) {};
|
||||
~NeighborKokkosBinAtomsFunctor() {}
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (const int & i) const {
|
||||
c.binatomsItem(i);
|
||||
}
|
||||
};
|
||||
|
||||
template<class Device,int HALF_NEIGH,int GHOST_NEWTON>
|
||||
struct NeighborKokkosBuildFunctor {
|
||||
typedef Device device_type;
|
||||
|
||||
const NeighborKokkosExecute<Device> c;
|
||||
const size_t sharedsize;
|
||||
|
||||
NeighborKokkosBuildFunctor(const NeighborKokkosExecute<Device> &_c,
|
||||
const size_t _sharedsize):c(_c),
|
||||
sharedsize(_sharedsize) {};
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (const int & i) const {
|
||||
c.template build_Item<HALF_NEIGH,GHOST_NEWTON>(i);
|
||||
}
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (typename Kokkos::TeamPolicy<Device>::member_type dev) const {
|
||||
c.template build_ItemCuda<HALF_NEIGH,GHOST_NEWTON>(dev);
|
||||
}
|
||||
size_t shmem_size(const int team_size) const { (void) team_size; return sharedsize; }
|
||||
#endif
|
||||
};
|
||||
|
||||
template<class Device,int HALF_NEIGH>
|
||||
struct NeighborKokkosBuildFunctorGhost {
|
||||
typedef Device device_type;
|
||||
|
||||
const NeighborKokkosExecute<Device> c;
|
||||
const size_t sharedsize;
|
||||
|
||||
NeighborKokkosBuildFunctorGhost(const NeighborKokkosExecute<Device> &_c,
|
||||
const size_t _sharedsize):c(_c),
|
||||
sharedsize(_sharedsize) {};
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (const int & i) const {
|
||||
c.template build_Item_Ghost<HALF_NEIGH>(i);
|
||||
}
|
||||
};
|
||||
|
||||
template<class Device,int ClusterSize>
|
||||
struct NeighborClusterKokkosBuildFunctor {
|
||||
typedef Device device_type;
|
||||
|
||||
const NeighborKokkosExecute<Device> c;
|
||||
const size_t sharedsize;
|
||||
|
||||
NeighborClusterKokkosBuildFunctor(const NeighborKokkosExecute<Device> &_c,
|
||||
const size_t _sharedsize):c(_c),
|
||||
sharedsize(_sharedsize) {};
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (const int & i) const {
|
||||
c.template build_cluster_Item<ClusterSize>(i);
|
||||
}
|
||||
};
|
||||
|
||||
template<class DeviceType>
|
||||
struct TagNeighborCheckDistance{};
|
||||
|
||||
@ -342,24 +32,11 @@ class NeighborKokkos : public Neighbor {
|
||||
public:
|
||||
typedef int value_type;
|
||||
|
||||
|
||||
|
||||
int nlist_host; // pairwise neighbor lists on Host
|
||||
NeighListKokkos<LMPHostType> **lists_host;
|
||||
int nlist_device; // pairwise neighbor lists on Device
|
||||
NeighListKokkos<LMPDeviceType> **lists_device;
|
||||
|
||||
NeighBondKokkos<LMPHostType> neighbond_host;
|
||||
NeighBondKokkos<LMPDeviceType> neighbond_device;
|
||||
|
||||
DAT::tdual_int_2d k_bondlist;
|
||||
DAT::tdual_int_2d k_anglelist;
|
||||
DAT::tdual_int_2d k_dihedrallist;
|
||||
DAT::tdual_int_2d k_improperlist;
|
||||
|
||||
NeighborKokkos(class LAMMPS *);
|
||||
~NeighborKokkos();
|
||||
void init();
|
||||
void init_topology();
|
||||
void build_topology();
|
||||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
@ -369,11 +46,7 @@ class NeighborKokkos : public Neighbor {
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagNeighborXhold<DeviceType>, const int&) const;
|
||||
|
||||
private:
|
||||
int atoms_per_bin;
|
||||
DAT::tdual_xfloat_2d k_cutneighsq;
|
||||
DAT::tdual_int_1d k_bincount;
|
||||
DAT::tdual_int_2d k_bins;
|
||||
|
||||
DAT::tdual_int_1d k_ex1_type,k_ex2_type;
|
||||
DAT::tdual_int_2d k_ex_type;
|
||||
@ -382,6 +55,16 @@ class NeighborKokkos : public Neighbor {
|
||||
DAT::tdual_int_1d k_ex_mol_group;
|
||||
DAT::tdual_int_1d k_ex_mol_bit;
|
||||
|
||||
NeighBondKokkos<LMPHostType> neighbond_host;
|
||||
NeighBondKokkos<LMPDeviceType> neighbond_device;
|
||||
|
||||
DAT::tdual_int_2d k_bondlist;
|
||||
DAT::tdual_int_2d k_anglelist;
|
||||
DAT::tdual_int_2d k_dihedrallist;
|
||||
DAT::tdual_int_2d k_improperlist;
|
||||
|
||||
private:
|
||||
|
||||
DAT::tdual_x_array x;
|
||||
DAT::tdual_x_array xhold;
|
||||
|
||||
@ -389,14 +72,10 @@ class NeighborKokkos : public Neighbor {
|
||||
int device_flag;
|
||||
|
||||
void init_cutneighsq_kokkos(int);
|
||||
int init_lists_kokkos();
|
||||
void init_list_flags1_kokkos(int);
|
||||
void init_list_flags2_kokkos(int);
|
||||
void init_list_grow_kokkos(int);
|
||||
void create_kokkos_list(int);
|
||||
void init_ex_type_kokkos(int);
|
||||
void init_ex_bit_kokkos();
|
||||
void init_ex_mol_bit_kokkos();
|
||||
void choose_build(int, NeighRequest *);
|
||||
virtual int check_distance();
|
||||
template<class DeviceType> int check_distance_kokkos();
|
||||
virtual void build(int);
|
||||
@ -405,27 +84,6 @@ class NeighborKokkos : public Neighbor {
|
||||
void modify_ex_type_grow_kokkos();
|
||||
void modify_ex_group_grow_kokkos();
|
||||
void modify_mol_group_grow_kokkos();
|
||||
void init_topology_kokkos();
|
||||
void build_topology_kokkos();
|
||||
|
||||
typedef void (NeighborKokkos::*PairPtrHost)
|
||||
(class NeighListKokkos<LMPHostType> *);
|
||||
PairPtrHost *pair_build_host;
|
||||
typedef void (NeighborKokkos::*PairPtrDevice)
|
||||
(class NeighListKokkos<LMPDeviceType> *);
|
||||
PairPtrDevice *pair_build_device;
|
||||
|
||||
template<class DeviceType,int HALF_NEIGH, int GHOST>
|
||||
void full_bin_kokkos(NeighListKokkos<DeviceType> *list);
|
||||
template<class DeviceType>
|
||||
void full_bin_cluster_kokkos(NeighListKokkos<DeviceType> *list);
|
||||
|
||||
typedef void (NeighborKokkos::*StencilPtrHost)
|
||||
(class NeighListKokkos<LMPHostType> *, int, int, int);
|
||||
StencilPtrHost *stencil_create_host;
|
||||
typedef void (NeighborKokkos::*StencilPtrDevice)
|
||||
(class NeighListKokkos<LMPDeviceType> *, int, int, int);
|
||||
StencilPtrDevice *stencil_create_device;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
62
src/KOKKOS/npair_copy_kokkos.cpp
Normal file
62
src/KOKKOS/npair_copy_kokkos.cpp
Normal file
@ -0,0 +1,62 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "npair_copy_kokkos.h"
|
||||
#include "neighbor.h"
|
||||
#include "neigh_list_kokkos.h"
|
||||
#include "atom.h"
|
||||
#include "atom_vec.h"
|
||||
#include "molecule.h"
|
||||
#include "domain.h"
|
||||
#include "my_page.h"
|
||||
#include "error.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
NPairCopyKokkos<DeviceType>::NPairCopyKokkos(LAMMPS *lmp) : NPair(lmp) {}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
create list which is simply a copy of parent list
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void NPairCopyKokkos<DeviceType>::build(NeighList *list)
|
||||
{
|
||||
NeighList *listcopy = list->listcopy;
|
||||
|
||||
list->inum = listcopy->inum;
|
||||
list->gnum = listcopy->gnum;
|
||||
list->ilist = listcopy->ilist;
|
||||
list->numneigh = listcopy->numneigh;
|
||||
list->firstneigh = listcopy->firstneigh;
|
||||
list->firstdouble = listcopy->firstdouble;
|
||||
list->ipage = listcopy->ipage;
|
||||
list->dpage = listcopy->dpage;
|
||||
|
||||
NeighListKokkos<DeviceType>* list_kk = (NeighListKokkos<DeviceType>*) list;
|
||||
NeighListKokkos<DeviceType>* listcopy_kk = (NeighListKokkos<DeviceType>*) list->listcopy;
|
||||
|
||||
list_kk->d_ilist = listcopy_kk->d_ilist;
|
||||
list_kk->d_numneigh = listcopy_kk->d_numneigh;
|
||||
list_kk->d_neighbors = listcopy_kk->d_neighbors;
|
||||
}
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
template class NPairCopyKokkos<LMPDeviceType>;
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
template class NPairCopyKokkos<LMPHostType>;
|
||||
#endif
|
||||
}
|
||||
48
src/KOKKOS/npair_copy_kokkos.h
Normal file
48
src/KOKKOS/npair_copy_kokkos.h
Normal file
@ -0,0 +1,48 @@
|
||||
/* -*- c++ -*- ----------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef NPAIR_CLASS
|
||||
|
||||
NPairStyle(copy/kk/device,
|
||||
NPairCopyKokkos<LMPDeviceType>,
|
||||
NP_COPY | NP_KOKKOS_DEVICE)
|
||||
|
||||
NPairStyle(copy/kk/host,
|
||||
NPairCopyKokkos<LMPHostType>,
|
||||
NP_COPY | NP_KOKKOS_HOST)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_NPAIR_COPY_KOKKOS_H
|
||||
#define LMP_NPAIR_COPY_KOKKOS_H
|
||||
|
||||
#include "npair.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
template<class DeviceType>
|
||||
class NPairCopyKokkos : public NPair {
|
||||
public:
|
||||
NPairCopyKokkos(class LAMMPS *);
|
||||
~NPairCopyKokkos() {}
|
||||
void build(class NeighList *);
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* ERROR/WARNING messages:
|
||||
|
||||
*/
|
||||
@ -11,17 +11,105 @@
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "npair_kokkos.h"
|
||||
#include "atom_kokkos.h"
|
||||
#include "atom_masks.h"
|
||||
#include "domain_kokkos.h"
|
||||
#include "neighbor_kokkos.h"
|
||||
#include "nbin_kokkos.h"
|
||||
#include "nstencil.h"
|
||||
#include "force.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType, int HALF_NEIGH, int GHOST>
|
||||
void NeighborKokkos::full_bin_kokkos(NeighListKokkos<DeviceType> *list)
|
||||
NPairKokkos<DeviceType,HALF_NEIGH,GHOST>::NPairKokkos(LAMMPS *lmp) : NPair(lmp) {
|
||||
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
copy needed info from Neighbor class to this build class
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType, int HALF_NEIGH, int GHOST>
|
||||
void NPairKokkos<DeviceType,HALF_NEIGH,GHOST>::copy_neighbor_info()
|
||||
{
|
||||
NPair::copy_neighbor_info();
|
||||
|
||||
NeighborKokkos* neighborKK = (NeighborKokkos*) neighbor;
|
||||
|
||||
// general params
|
||||
|
||||
newton_pair = force->newton_pair;
|
||||
k_cutneighsq = neighborKK->k_cutneighsq;
|
||||
|
||||
// exclusion info
|
||||
|
||||
k_ex1_type = neighborKK->k_ex1_type;
|
||||
k_ex2_type = neighborKK->k_ex2_type;
|
||||
k_ex_type = neighborKK->k_ex_type;
|
||||
k_ex1_group = neighborKK->k_ex1_group;
|
||||
k_ex2_group = neighborKK->k_ex2_group;
|
||||
k_ex1_bit = neighborKK->k_ex1_bit;
|
||||
k_ex2_bit = neighborKK->k_ex2_bit;
|
||||
k_ex_mol_group = neighborKK->k_ex_mol_group;
|
||||
k_ex_mol_bit = neighborKK->k_ex_mol_bit;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
copy per-atom and per-bin vectors from NBin class to this build class
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType, int HALF_NEIGH, int GHOST>
|
||||
void NPairKokkos<DeviceType,HALF_NEIGH,GHOST>::copy_bin_info()
|
||||
{
|
||||
NPair::copy_bin_info();
|
||||
|
||||
NBinKokkos<DeviceType>* nbKK = (NBinKokkos<DeviceType>*) nb;
|
||||
|
||||
atoms_per_bin = nbKK->atoms_per_bin;
|
||||
k_bincount = nbKK->k_bincount;
|
||||
k_bins = nbKK->k_bins;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
copy needed info from NStencil class to this build class
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType, int HALF_NEIGH, int GHOST>
|
||||
void NPairKokkos<DeviceType,HALF_NEIGH,GHOST>::copy_stencil_info()
|
||||
{
|
||||
NPair::copy_stencil_info();
|
||||
|
||||
nstencil = ns->nstencil;
|
||||
|
||||
int maxstencil = ns->get_maxstencil();
|
||||
|
||||
k_stencil = DAT::tdual_int_1d("neighlist:stencil",maxstencil);
|
||||
for (int k = 0; k < maxstencil; k++)
|
||||
k_stencil.h_view(k) = ns->stencil[k];
|
||||
k_stencil.modify<LMPHostType>();
|
||||
k_stencil.sync<DeviceType>();
|
||||
if (GHOST) {
|
||||
k_stencilxyz = DAT::tdual_int_1d_3("neighlist:stencilxyz",maxstencil);
|
||||
for (int k = 0; k < maxstencil; k++) {
|
||||
k_stencilxyz.h_view(k,0) = ns->stencilxyz[k][0];
|
||||
k_stencilxyz.h_view(k,1) = ns->stencilxyz[k][1];
|
||||
k_stencilxyz.h_view(k,2) = ns->stencilxyz[k][2];
|
||||
}
|
||||
k_stencilxyz.modify<LMPHostType>();
|
||||
k_stencilxyz.sync<DeviceType>();
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType, int HALF_NEIGH, int GHOST>
|
||||
void NPairKokkos<DeviceType,HALF_NEIGH,GHOST>::build(NeighList *list_)
|
||||
{
|
||||
NeighListKokkos<DeviceType>* list = (NeighListKokkos<DeviceType>*) list_;
|
||||
const int nlocal = includegroup?atom->nfirst:atom->nlocal;
|
||||
int nall = nlocal;
|
||||
if (GHOST)
|
||||
@ -32,7 +120,11 @@ void NeighborKokkos::full_bin_kokkos(NeighListKokkos<DeviceType> *list)
|
||||
data(*list,
|
||||
k_cutneighsq.view<DeviceType>(),
|
||||
k_bincount.view<DeviceType>(),
|
||||
k_bins.view<DeviceType>(),nlocal,
|
||||
k_bins.view<DeviceType>(),
|
||||
nstencil,
|
||||
k_stencil.view<DeviceType>(),
|
||||
k_stencilxyz.view<DeviceType>(),
|
||||
nlocal,
|
||||
atomKK->k_x.view<DeviceType>(),
|
||||
atomKK->k_type.view<DeviceType>(),
|
||||
atomKK->k_mask.view<DeviceType>(),
|
||||
@ -43,16 +135,16 @@ void NeighborKokkos::full_bin_kokkos(NeighListKokkos<DeviceType> *list)
|
||||
atomKK->molecular,
|
||||
nbinx,nbiny,nbinz,mbinx,mbiny,mbinz,mbinxlo,mbinylo,mbinzlo,
|
||||
bininvx,bininvy,bininvz,
|
||||
exclude, nex_type,maxex_type,
|
||||
exclude, nex_type,
|
||||
k_ex1_type.view<DeviceType>(),
|
||||
k_ex2_type.view<DeviceType>(),
|
||||
k_ex_type.view<DeviceType>(),
|
||||
nex_group,maxex_group,
|
||||
nex_group,
|
||||
k_ex1_group.view<DeviceType>(),
|
||||
k_ex2_group.view<DeviceType>(),
|
||||
k_ex1_bit.view<DeviceType>(),
|
||||
k_ex2_bit.view<DeviceType>(),
|
||||
nex_mol, maxex_mol,
|
||||
nex_mol,
|
||||
k_ex_mol_group.view<DeviceType>(),
|
||||
k_ex_mol_bit.view<DeviceType>(),
|
||||
bboxhi,bboxlo,
|
||||
@ -69,40 +161,15 @@ void NeighborKokkos::full_bin_kokkos(NeighListKokkos<DeviceType> *list)
|
||||
k_ex2_bit.sync<DeviceType>();
|
||||
k_ex_mol_group.sync<DeviceType>();
|
||||
k_ex_mol_bit.sync<DeviceType>();
|
||||
k_bincount.sync<DeviceType>(),
|
||||
k_bins.sync<DeviceType>(),
|
||||
atomKK->sync(Device,X_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK|TAG_MASK|SPECIAL_MASK);
|
||||
Kokkos::deep_copy(list->d_stencil,list->h_stencil);
|
||||
if (GHOST)
|
||||
Kokkos::deep_copy(list->d_stencilxyz,list->h_stencilxyz);
|
||||
|
||||
data.special_flag[0] = special_flag[0];
|
||||
data.special_flag[1] = special_flag[1];
|
||||
data.special_flag[2] = special_flag[2];
|
||||
data.special_flag[3] = special_flag[3];
|
||||
|
||||
while(data.h_resize() > 0) {
|
||||
data.h_resize() = 0;
|
||||
deep_copy(data.resize, data.h_resize);
|
||||
|
||||
MemsetZeroFunctor<DeviceType> f_zero;
|
||||
f_zero.ptr = (void*) k_bincount.view<DeviceType>().ptr_on_device();
|
||||
Kokkos::parallel_for(mbins, f_zero);
|
||||
DeviceType::fence();
|
||||
|
||||
NeighborKokkosBinAtomsFunctor<DeviceType> f(data);
|
||||
|
||||
Kokkos::parallel_for(atom->nlocal+atom->nghost, f);
|
||||
DeviceType::fence();
|
||||
|
||||
deep_copy(data.h_resize, data.resize);
|
||||
if(data.h_resize()) {
|
||||
|
||||
atoms_per_bin += 16;
|
||||
k_bins = DAT::tdual_int_2d("bins", mbins, atoms_per_bin);
|
||||
data.bins = k_bins.view<DeviceType>();
|
||||
data.c_bins = data.bins;
|
||||
}
|
||||
}
|
||||
|
||||
if(list->d_neighbors.dimension_0()<nall) {
|
||||
list->d_neighbors = typename ArrayTypes<DeviceType>::t_neighbors_2d("neighbors", nall*1.1, list->maxneighs);
|
||||
list->d_numneigh = typename ArrayTypes<DeviceType>::t_int_1d("numneigh", nall*1.1);
|
||||
@ -125,18 +192,18 @@ void NeighborKokkos::full_bin_kokkos(NeighListKokkos<DeviceType> *list)
|
||||
#endif
|
||||
|
||||
if (GHOST) {
|
||||
NeighborKokkosBuildFunctorGhost<DeviceType,HALF_NEIGH> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor);
|
||||
NPairKokkosBuildFunctorGhost<DeviceType,HALF_NEIGH> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor);
|
||||
Kokkos::parallel_for(nall, f);
|
||||
} else {
|
||||
if (newton_pair) {
|
||||
NeighborKokkosBuildFunctor<DeviceType,HALF_NEIGH,1> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor);
|
||||
NPairKokkosBuildFunctor<DeviceType,HALF_NEIGH,1> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor);
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
Kokkos::parallel_for(config, f);
|
||||
#else
|
||||
Kokkos::parallel_for(nall, f);
|
||||
#endif
|
||||
} else {
|
||||
NeighborKokkosBuildFunctor<DeviceType,HALF_NEIGH,0> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor);
|
||||
NPairKokkosBuildFunctor<DeviceType,HALF_NEIGH,0> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor);
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
Kokkos::parallel_for(config, f);
|
||||
#else
|
||||
@ -169,24 +236,9 @@ if (GHOST) {
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class Device>
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void NeighborKokkosExecute<Device>::binatomsItem(const int &i) const
|
||||
{
|
||||
const int ibin = coord2bin(x(i, 0), x(i, 1), x(i, 2));
|
||||
|
||||
const int ac = Kokkos::atomic_fetch_add(&bincount[ibin], (int)1);
|
||||
if(ac < bins.dimension_1()) {
|
||||
bins(ibin, ac) = i;
|
||||
} else {
|
||||
resize() = 1;
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
template<class Device>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
int NeighborKokkosExecute<Device>::find_special(const int &i, const int &j) const
|
||||
int NeighborKokkosExecute<DeviceType>::find_special(const int &i, const int &j) const
|
||||
{
|
||||
const int n1 = nspecial(i,0);
|
||||
const int n2 = nspecial(i,1);
|
||||
@ -214,9 +266,9 @@ int NeighborKokkosExecute<Device>::find_special(const int &i, const int &j) cons
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class Device>
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
int NeighborKokkosExecute<Device>::exclusion(const int &i,const int &j,
|
||||
int NeighborKokkosExecute<DeviceType>::exclusion(const int &i,const int &j,
|
||||
const int &itype,const int &jtype) const
|
||||
{
|
||||
int m;
|
||||
@ -241,8 +293,8 @@ int NeighborKokkosExecute<Device>::exclusion(const int &i,const int &j,
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class Device> template<int HalfNeigh,int GhostNewton>
|
||||
void NeighborKokkosExecute<Device>::
|
||||
template<class DeviceType> template<int HalfNeigh,int Newton>
|
||||
void NeighborKokkosExecute<DeviceType>::
|
||||
build_Item(const int &i) const
|
||||
{
|
||||
/* if necessary, goto next page and add pages */
|
||||
@ -261,9 +313,8 @@ void NeighborKokkosExecute<Device>::
|
||||
|
||||
const int ibin = coord2bin(xtmp, ytmp, ztmp);
|
||||
|
||||
const int nstencil = neigh_list.nstencil;
|
||||
const typename ArrayTypes<Device>::t_int_1d_const_um stencil
|
||||
= neigh_list.d_stencil;
|
||||
const typename ArrayTypes<DeviceType>::t_int_1d_const_um stencil
|
||||
= d_stencil;
|
||||
|
||||
// loop over all bins in neighborhood (includes ibin)
|
||||
if(HalfNeigh)
|
||||
@ -272,8 +323,8 @@ void NeighborKokkosExecute<Device>::
|
||||
const int jtype = type(j);
|
||||
|
||||
//for same bin as atom i skip j if i==j and skip atoms "below and to the left" if using HalfNeighborlists
|
||||
if((j == i) || (HalfNeigh && !GhostNewton && (j < i)) ||
|
||||
(HalfNeigh && GhostNewton && ((j < i) || ((j >= nlocal) &&
|
||||
if((j == i) || (HalfNeigh && !Newton && (j < i)) ||
|
||||
(HalfNeigh && Newton && ((j < i) || ((j >= nlocal) &&
|
||||
((x(j, 2) < ztmp) || (x(j, 2) == ztmp && x(j, 1) < ytmp) ||
|
||||
(x(j, 2) == ztmp && x(j, 1) == ytmp && x(j, 0) < xtmp)))))
|
||||
) continue;
|
||||
@ -312,14 +363,16 @@ void NeighborKokkosExecute<Device>::
|
||||
|
||||
for(int k = 0; k < nstencil; k++) {
|
||||
const int jbin = ibin + stencil[k];
|
||||
|
||||
// get subview of jbin
|
||||
if(HalfNeigh&&(ibin==jbin)) continue;
|
||||
//const ArrayTypes<Device>::t_int_1d_const_um =Kokkos::subview<t_int_1d_const_um>(bins,jbin,ALL);
|
||||
//const ArrayTypes<DeviceType>::t_int_1d_const_um =Kokkos::subview<t_int_1d_const_um>(bins,jbin,ALL);
|
||||
for(int m = 0; m < c_bincount(jbin); m++) {
|
||||
|
||||
const int j = c_bins(jbin,m);
|
||||
const int jtype = type(j);
|
||||
|
||||
if(HalfNeigh && !GhostNewton && (j < i)) continue;
|
||||
if(HalfNeigh && !Newton && (j < i)) continue;
|
||||
if(!HalfNeigh && j==i) continue;
|
||||
if(exclude && exclusion(i,j,itype,jtype)) continue;
|
||||
|
||||
@ -331,7 +384,7 @@ void NeighborKokkosExecute<Device>::
|
||||
if(rsq <= cutneighsq(itype,jtype)) {
|
||||
if (molecular) {
|
||||
if (!moltemplate)
|
||||
which = find_special(i,j);
|
||||
which = NeighborKokkosExecute<DeviceType>::find_special(i,j);
|
||||
/* else if (imol >= 0) */
|
||||
/* which = find_special(onemols[imol]->special[iatom], */
|
||||
/* onemols[imol]->nspecial[iatom], */
|
||||
@ -364,15 +417,18 @@ void NeighborKokkosExecute<Device>::
|
||||
|
||||
if(n >= new_maxneighs()) new_maxneighs() = n;
|
||||
}
|
||||
|
||||
neigh_list.d_ilist(i) = i;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
extern __shared__ X_FLOAT sharedmem[];
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType> template<int HalfNeigh,int GhostNewton>
|
||||
template<class DeviceType> template<int HalfNeigh,int Newton>
|
||||
__device__ inline
|
||||
void NeighborKokkosExecute<DeviceType>::build_ItemCuda(typename Kokkos::TeamPolicy<DeviceType>::member_type dev) const
|
||||
{
|
||||
@ -429,8 +485,8 @@ void NeighborKokkosExecute<DeviceType>::build_ItemCuda(typename Kokkos::TeamPoli
|
||||
|
||||
//for same bin as atom i skip j if i==j and skip atoms "below and to the left" if using halfneighborlists
|
||||
if((j == i) ||
|
||||
(HalfNeigh && !GhostNewton && (j < i)) ||
|
||||
(HalfNeigh && GhostNewton &&
|
||||
(HalfNeigh && !Newton && (j < i)) ||
|
||||
(HalfNeigh && Newton &&
|
||||
((j < i) ||
|
||||
((j >= nlocal) && ((x(j, 2) < ztmp) || (x(j, 2) == ztmp && x(j, 1) < ytmp) ||
|
||||
(x(j, 2) == ztmp && x(j, 1) == ytmp && x(j, 0) < xtmp)))))
|
||||
@ -445,7 +501,7 @@ void NeighborKokkosExecute<DeviceType>::build_ItemCuda(typename Kokkos::TeamPoli
|
||||
if (molecular) {
|
||||
int which = 0;
|
||||
if (!moltemplate)
|
||||
which = find_special(i,j);
|
||||
which = NeighborKokkosExecute<DeviceType>::find_special(i,j);
|
||||
/* else if (imol >= 0) */
|
||||
/* which = find_special(onemols[imol]->special[iatom], */
|
||||
/* onemols[imol]->nspecial[iatom], */
|
||||
@ -472,9 +528,8 @@ void NeighborKokkosExecute<DeviceType>::build_ItemCuda(typename Kokkos::TeamPoli
|
||||
}
|
||||
__syncthreads();
|
||||
|
||||
const int nstencil = neigh_list.nstencil;
|
||||
const typename ArrayTypes<DeviceType>::t_int_1d_const_um stencil
|
||||
= neigh_list.d_stencil;
|
||||
= d_stencil;
|
||||
for(int k = 0; k < nstencil; k++) {
|
||||
const int jbin = ibin + stencil[k];
|
||||
|
||||
@ -501,7 +556,7 @@ void NeighborKokkosExecute<DeviceType>::build_ItemCuda(typename Kokkos::TeamPoli
|
||||
const int jtype = other_x[m + 3 * atoms_per_bin];
|
||||
|
||||
//if(HalfNeigh && (j < i)) continue;
|
||||
if(HalfNeigh && !GhostNewton && (j < i)) continue;
|
||||
if(HalfNeigh && !Newton && (j < i)) continue;
|
||||
if(!HalfNeigh && j==i) continue;
|
||||
if(exclude && exclusion(i,j,itype,jtype)) continue;
|
||||
|
||||
@ -514,7 +569,7 @@ void NeighborKokkosExecute<DeviceType>::build_ItemCuda(typename Kokkos::TeamPoli
|
||||
if (molecular) {
|
||||
int which = 0;
|
||||
if (!moltemplate)
|
||||
which = find_special(i,j);
|
||||
which = NeighborKokkosExecute<DeviceType>::find_special(i,j);
|
||||
/* else if (imol >= 0) */
|
||||
/* which = find_special(onemols[imol]->special[iatom], */
|
||||
/* onemols[imol]->nspecial[iatom], */
|
||||
@ -558,8 +613,8 @@ void NeighborKokkosExecute<DeviceType>::build_ItemCuda(typename Kokkos::TeamPoli
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class Device> template<int HalfNeigh>
|
||||
void NeighborKokkosExecute<Device>::
|
||||
template<class DeviceType> template<int HalfNeigh>
|
||||
void NeighborKokkosExecute<DeviceType>::
|
||||
build_Item_Ghost(const int &i) const
|
||||
{
|
||||
/* if necessary, goto next page and add pages */
|
||||
@ -576,11 +631,10 @@ void NeighborKokkosExecute<Device>::
|
||||
const X_FLOAT ztmp = x(i, 2);
|
||||
const int itype = type(i);
|
||||
|
||||
const int nstencil = neigh_list.nstencil;
|
||||
const typename ArrayTypes<Device>::t_int_1d_const_um stencil
|
||||
= neigh_list.d_stencil;
|
||||
const typename ArrayTypes<Device>::t_int_1d_3_const_um stencilxyz
|
||||
= neigh_list.d_stencilxyz;
|
||||
const typename ArrayTypes<DeviceType>::t_int_1d_const_um stencil
|
||||
= d_stencil;
|
||||
const typename ArrayTypes<DeviceType>::t_int_1d_3_const_um stencilxyz
|
||||
= d_stencilxyz;
|
||||
|
||||
// loop over all atoms in surrounding bins in stencil including self
|
||||
// when i is a ghost atom, must check if stencil bin is out of bounds
|
||||
@ -679,197 +733,17 @@ void NeighborKokkosExecute<Device>::
|
||||
neigh_list.d_ilist(i) = i;
|
||||
}
|
||||
|
||||
template<class DeviceType>
|
||||
void NeighborKokkos::full_bin_cluster_kokkos(NeighListKokkos<DeviceType> *list)
|
||||
{
|
||||
const int nall = includegroup?atom->nfirst:atom->nlocal;
|
||||
list->grow(nall);
|
||||
|
||||
NeighborKokkosExecute<DeviceType>
|
||||
data(*list,
|
||||
k_cutneighsq.view<DeviceType>(),
|
||||
k_bincount.view<DeviceType>(),
|
||||
k_bins.view<DeviceType>(),nall,
|
||||
atomKK->k_x.view<DeviceType>(),
|
||||
atomKK->k_type.view<DeviceType>(),
|
||||
atomKK->k_mask.view<DeviceType>(),
|
||||
atomKK->k_molecule.view<DeviceType>(),
|
||||
atomKK->k_tag.view<DeviceType>(),
|
||||
atomKK->k_special.view<DeviceType>(),
|
||||
atomKK->k_nspecial.view<DeviceType>(),
|
||||
atomKK->molecular,
|
||||
nbinx,nbiny,nbinz,mbinx,mbiny,mbinz,mbinxlo,mbinylo,mbinzlo,
|
||||
bininvx,bininvy,bininvz,
|
||||
exclude, nex_type,maxex_type,
|
||||
k_ex1_type.view<DeviceType>(),
|
||||
k_ex2_type.view<DeviceType>(),
|
||||
k_ex_type.view<DeviceType>(),
|
||||
nex_group,maxex_group,
|
||||
k_ex1_group.view<DeviceType>(),
|
||||
k_ex2_group.view<DeviceType>(),
|
||||
k_ex1_bit.view<DeviceType>(),
|
||||
k_ex2_bit.view<DeviceType>(),
|
||||
nex_mol, maxex_mol,
|
||||
k_ex_mol_group.view<DeviceType>(),
|
||||
k_ex_mol_bit.view<DeviceType>(),
|
||||
bboxhi,bboxlo,
|
||||
domain->xperiodic,domain->yperiodic,domain->zperiodic,
|
||||
domain->xprd_half,domain->yprd_half,domain->zprd_half);
|
||||
|
||||
k_cutneighsq.sync<DeviceType>();
|
||||
k_ex1_type.sync<DeviceType>();
|
||||
k_ex2_type.sync<DeviceType>();
|
||||
k_ex_type.sync<DeviceType>();
|
||||
k_ex1_group.sync<DeviceType>();
|
||||
k_ex2_group.sync<DeviceType>();
|
||||
k_ex1_bit.sync<DeviceType>();
|
||||
k_ex2_bit.sync<DeviceType>();
|
||||
k_ex_mol_group.sync<DeviceType>();
|
||||
k_ex_mol_bit.sync<DeviceType>();
|
||||
|
||||
data.special_flag[0] = special_flag[0];
|
||||
data.special_flag[1] = special_flag[1];
|
||||
data.special_flag[2] = special_flag[2];
|
||||
data.special_flag[3] = special_flag[3];
|
||||
|
||||
atomKK->sync(Device,X_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK|TAG_MASK|SPECIAL_MASK);
|
||||
Kokkos::deep_copy(list->d_stencil,list->h_stencil);
|
||||
DeviceType::fence();
|
||||
|
||||
while(data.h_resize() > 0) {
|
||||
data.h_resize() = 0;
|
||||
deep_copy(data.resize, data.h_resize);
|
||||
|
||||
MemsetZeroFunctor<DeviceType> f_zero;
|
||||
f_zero.ptr = (void*) k_bincount.view<DeviceType>().ptr_on_device();
|
||||
Kokkos::parallel_for(mbins, f_zero);
|
||||
DeviceType::fence();
|
||||
|
||||
NeighborKokkosBinAtomsFunctor<DeviceType> f(data);
|
||||
|
||||
Kokkos::parallel_for(atom->nlocal+atom->nghost, f);
|
||||
DeviceType::fence();
|
||||
|
||||
deep_copy(data.h_resize, data.resize);
|
||||
if(data.h_resize()) {
|
||||
|
||||
atoms_per_bin += 16;
|
||||
k_bins = DAT::tdual_int_2d("bins", mbins, atoms_per_bin);
|
||||
data.bins = k_bins.view<DeviceType>();
|
||||
data.c_bins = data.bins;
|
||||
}
|
||||
}
|
||||
|
||||
if(list->d_neighbors.dimension_0()<nall) {
|
||||
list->d_neighbors = typename ArrayTypes<DeviceType>::t_neighbors_2d("neighbors", nall*1.1, list->maxneighs);
|
||||
list->d_numneigh = typename ArrayTypes<DeviceType>::t_int_1d("numneigh", nall*1.1);
|
||||
data.neigh_list.d_neighbors = list->d_neighbors;
|
||||
data.neigh_list.d_numneigh = list->d_numneigh;
|
||||
}
|
||||
data.h_resize()=1;
|
||||
while(data.h_resize()) {
|
||||
data.h_new_maxneighs() = list->maxneighs;
|
||||
data.h_resize() = 0;
|
||||
|
||||
Kokkos::deep_copy(data.resize, data.h_resize);
|
||||
Kokkos::deep_copy(data.new_maxneighs, data.h_new_maxneighs);
|
||||
namespace LAMMPS_NS {
|
||||
template class NPairKokkos<LMPDeviceType,0,0>;
|
||||
template class NPairKokkos<LMPDeviceType,0,1>;
|
||||
template class NPairKokkos<LMPDeviceType,1,0>;
|
||||
template class NPairKokkos<LMPDeviceType,1,1>;
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
#define BINS_PER_BLOCK 2
|
||||
const int factor = atoms_per_bin<64?2:1;
|
||||
Kokkos::TeamPolicy<DeviceType> config((mbins+factor-1)/factor,atoms_per_bin*factor);
|
||||
#else
|
||||
const int factor = 1;
|
||||
template class NPairKokkos<LMPHostType,0,0>;
|
||||
template class NPairKokkos<LMPHostType,0,1>;
|
||||
template class NPairKokkos<LMPHostType,1,0>;
|
||||
template class NPairKokkos<LMPHostType,1,1>;
|
||||
#endif
|
||||
|
||||
if(newton_pair) {
|
||||
NeighborClusterKokkosBuildFunctor<DeviceType,NeighClusterSize> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor);
|
||||
//#ifdef KOKKOS_HAVE_CUDA
|
||||
// Kokkos::parallel_for(config, f);
|
||||
//#else
|
||||
Kokkos::parallel_for(nall, f);
|
||||
//#endif
|
||||
} else {
|
||||
NeighborClusterKokkosBuildFunctor<DeviceType,NeighClusterSize> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor);
|
||||
//#ifdef KOKKOS_HAVE_CUDA
|
||||
// Kokkos::parallel_for(config, f);
|
||||
//#else
|
||||
Kokkos::parallel_for(nall, f);
|
||||
//#endif
|
||||
}
|
||||
DeviceType::fence();
|
||||
deep_copy(data.h_resize, data.resize);
|
||||
|
||||
if(data.h_resize()) {
|
||||
deep_copy(data.h_new_maxneighs, data.new_maxneighs);
|
||||
list->maxneighs = data.h_new_maxneighs() * 1.2;
|
||||
list->d_neighbors = typename ArrayTypes<DeviceType>::t_neighbors_2d("neighbors", list->d_neighbors.dimension_0(), list->maxneighs);
|
||||
data.neigh_list.d_neighbors = list->d_neighbors;
|
||||
data.neigh_list.maxneighs = list->maxneighs;
|
||||
}
|
||||
}
|
||||
|
||||
list->inum = nall;
|
||||
list->gnum = 0;
|
||||
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class Device> template<int ClusterSize>
|
||||
void NeighborKokkosExecute<Device>::
|
||||
build_cluster_Item(const int &i) const
|
||||
{
|
||||
/* if necessary, goto next page and add pages */
|
||||
int n = 0;
|
||||
|
||||
// get subview of neighbors of i
|
||||
|
||||
const AtomNeighbors neighbors_i = neigh_list.get_neighbors(i);
|
||||
const X_FLOAT xtmp = x(i, 0);
|
||||
const X_FLOAT ytmp = x(i, 1);
|
||||
const X_FLOAT ztmp = x(i, 2);
|
||||
const int itype = type(i);
|
||||
|
||||
const int ibin = coord2bin(xtmp, ytmp, ztmp);
|
||||
|
||||
const int nstencil = neigh_list.nstencil;
|
||||
const typename ArrayTypes<Device>::t_int_1d_const_um stencil
|
||||
= neigh_list.d_stencil;
|
||||
|
||||
for(int k = 0; k < nstencil; k++) {
|
||||
const int jbin = ibin + stencil[k];
|
||||
for(int m = 0; m < c_bincount(jbin); m++) {
|
||||
const int j = c_bins(jbin,m);
|
||||
bool skip = i == j;
|
||||
for(int k = 0; k< (n<neigh_list.maxneighs?n:neigh_list.maxneighs); k++)
|
||||
if((j-(j%ClusterSize)) == neighbors_i(k)) {skip=true;};//{m += ClusterSize - j&(ClusterSize-1)-1; skip=true;}
|
||||
|
||||
if(!skip) {
|
||||
const int jtype = type(j);
|
||||
|
||||
const X_FLOAT delx = xtmp - x(j, 0);
|
||||
const X_FLOAT dely = ytmp - x(j, 1);
|
||||
const X_FLOAT delz = ztmp - x(j, 2);
|
||||
const X_FLOAT rsq = delx * delx + dely * dely + delz * delz;
|
||||
|
||||
if(rsq <= cutneighsq(itype,jtype)) {
|
||||
if(n<neigh_list.maxneighs) neighbors_i(n) = (j-(j%ClusterSize));
|
||||
n++;
|
||||
//m += ClusterSize - j&(ClusterSize-1)-1;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
neigh_list.d_numneigh(i) = n;
|
||||
|
||||
if(n >= neigh_list.maxneighs) {
|
||||
resize() = 1;
|
||||
|
||||
if(n >= new_maxneighs()) new_maxneighs() = n;
|
||||
}
|
||||
neigh_list.d_ilist(i) = i;
|
||||
}
|
||||
|
||||
}
|
||||
424
src/KOKKOS/npair_kokkos.h
Normal file
424
src/KOKKOS/npair_kokkos.h
Normal file
@ -0,0 +1,424 @@
|
||||
/* -*- c++ -*- ----------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef NPAIR_CLASS
|
||||
|
||||
typedef NPairKokkos<LMPHostType,0,0> NPairKokkosFullBinHost;
|
||||
NPairStyle(full/bin/kk/host,
|
||||
NPairKokkosFullBinHost,
|
||||
NP_FULL | NP_BIN | NP_KOKKOS_HOST | NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI)
|
||||
|
||||
typedef NPairKokkos<LMPDeviceType,0,0> NPairKokkosFullBinDevice;
|
||||
NPairStyle(full/bin/kk/device,
|
||||
NPairKokkosFullBinDevice,
|
||||
NP_FULL | NP_BIN | NP_KOKKOS_DEVICE | NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI)
|
||||
|
||||
typedef NPairKokkos<LMPHostType,0,1> NPairKokkosFullBinGhostHost;
|
||||
NPairStyle(full/bin/ghost/kk/host,
|
||||
NPairKokkosFullBinGhostHost,
|
||||
NP_FULL | NP_BIN | NP_KOKKOS_HOST | NP_NEWTON | NP_NEWTOFF | NP_GHOST | NP_ORTHO | NP_TRI)
|
||||
|
||||
typedef NPairKokkos<LMPDeviceType,0,1> NPairKokkosFullBinGhostDevice;
|
||||
NPairStyle(full/bin/ghost/kk/device,
|
||||
NPairKokkosFullBinGhostDevice,
|
||||
NP_FULL | NP_BIN | NP_KOKKOS_DEVICE | NP_NEWTON | NP_NEWTOFF | NP_GHOST | NP_ORTHO | NP_TRI)
|
||||
|
||||
typedef NPairKokkos<LMPHostType,1,0> NPairKokkosHalfBinHost;
|
||||
NPairStyle(half/bin/kk/host,
|
||||
NPairKokkosHalfBinHost,
|
||||
NP_HALF | NP_BIN | NP_KOKKOS_HOST | NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI)
|
||||
|
||||
typedef NPairKokkos<LMPDeviceType,1,0> NPairKokkosHalfBinDevice;
|
||||
NPairStyle(half/bin/kk/device,
|
||||
NPairKokkosHalfBinDevice,
|
||||
NP_HALF | NP_BIN | NP_KOKKOS_DEVICE | NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI)
|
||||
|
||||
typedef NPairKokkos<LMPHostType,1,1> NPairKokkosHalfBinGhostHost;
|
||||
NPairStyle(half/bin/ghost/kk/host,
|
||||
NPairKokkosHalfBinGhostHost,
|
||||
NP_HALF | NP_BIN | NP_KOKKOS_HOST | NP_NEWTON | NP_NEWTOFF | NP_GHOST | NP_ORTHO | NP_TRI)
|
||||
|
||||
typedef NPairKokkos<LMPDeviceType,1,1> NPairKokkosHalfBinGhostDevice;
|
||||
NPairStyle(half/bin/ghost/kk/device,
|
||||
NPairKokkosHalfBinGhostDevice,
|
||||
NP_HALF | NP_BIN | NP_KOKKOS_DEVICE | NP_NEWTON | NP_NEWTOFF | NP_GHOST | NP_ORTHO | NP_TRI)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_NPAIR_KOKKOS_H
|
||||
#define LMP_NPAIR_KOKKOS_H
|
||||
|
||||
#include "npair.h"
|
||||
#include "neigh_list_kokkos.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
template<class DeviceType, int HALF_NEIGH, int GHOST>
|
||||
class NPairKokkos : public NPair {
|
||||
public:
|
||||
NPairKokkos(class LAMMPS *);
|
||||
~NPairKokkos() {}
|
||||
void copy_neighbor_info();
|
||||
void copy_bin_info();
|
||||
void copy_stencil_info();
|
||||
void build(class NeighList *);
|
||||
|
||||
private:
|
||||
int newton_pair;
|
||||
|
||||
// data from Neighbor class
|
||||
|
||||
DAT::tdual_xfloat_2d k_cutneighsq;
|
||||
|
||||
// exclusion data from Neighbor class
|
||||
|
||||
DAT::tdual_int_1d k_ex1_type,k_ex2_type;
|
||||
DAT::tdual_int_2d k_ex_type;
|
||||
DAT::tdual_int_1d k_ex1_group,k_ex2_group;
|
||||
DAT::tdual_int_1d k_ex1_bit,k_ex2_bit;
|
||||
DAT::tdual_int_1d k_ex_mol_group;
|
||||
DAT::tdual_int_1d k_ex_mol_bit;
|
||||
|
||||
// data from NBin class
|
||||
|
||||
int atoms_per_bin;
|
||||
DAT::tdual_int_1d k_bincount;
|
||||
DAT::tdual_int_2d k_bins;
|
||||
|
||||
// data from NStencil class
|
||||
|
||||
int nstencil;
|
||||
DAT::tdual_int_1d k_stencil; // # of J neighs for each I
|
||||
DAT::tdual_int_1d_3 k_stencilxyz;
|
||||
};
|
||||
|
||||
template<class DeviceType>
|
||||
class NeighborKokkosExecute
|
||||
{
|
||||
typedef ArrayTypes<DeviceType> AT;
|
||||
|
||||
public:
|
||||
NeighListKokkos<DeviceType> neigh_list;
|
||||
|
||||
// data from Neighbor class
|
||||
|
||||
const typename AT::t_xfloat_2d_randomread cutneighsq;
|
||||
|
||||
// exclusion data from Neighbor class
|
||||
|
||||
const int exclude;
|
||||
|
||||
const int nex_type;
|
||||
const typename AT::t_int_1d_const ex1_type,ex2_type;
|
||||
const typename AT::t_int_2d_const ex_type;
|
||||
|
||||
const int nex_group;
|
||||
const typename AT::t_int_1d_const ex1_group,ex2_group;
|
||||
const typename AT::t_int_1d_const ex1_bit,ex2_bit;
|
||||
|
||||
const int nex_mol;
|
||||
const typename AT::t_int_1d_const ex_mol_group;
|
||||
const typename AT::t_int_1d_const ex_mol_bit;
|
||||
|
||||
// data from NBin class
|
||||
|
||||
const typename AT::t_int_1d bincount;
|
||||
const typename AT::t_int_1d_const c_bincount;
|
||||
typename AT::t_int_2d bins;
|
||||
typename AT::t_int_2d_const c_bins;
|
||||
|
||||
|
||||
// data from NStencil class
|
||||
|
||||
int nstencil;
|
||||
typename AT::t_int_1d d_stencil; // # of J neighs for each I
|
||||
typename AT::t_int_1d_3 d_stencilxyz;
|
||||
|
||||
// data from Atom class
|
||||
|
||||
const typename AT::t_x_array_randomread x;
|
||||
const typename AT::t_int_1d_const type,mask,molecule;
|
||||
const typename AT::t_tagint_1d_const tag;
|
||||
const typename AT::t_tagint_2d_const special;
|
||||
const typename AT::t_int_2d_const nspecial;
|
||||
const int molecular;
|
||||
int moltemplate;
|
||||
|
||||
int special_flag[4];
|
||||
|
||||
const int nbinx,nbiny,nbinz;
|
||||
const int mbinx,mbiny,mbinz;
|
||||
const int mbinxlo,mbinylo,mbinzlo;
|
||||
const X_FLOAT bininvx,bininvy,bininvz;
|
||||
X_FLOAT bboxhi[3],bboxlo[3];
|
||||
|
||||
const int nlocal;
|
||||
|
||||
typename AT::t_int_scalar resize;
|
||||
typename AT::t_int_scalar new_maxneighs;
|
||||
typename ArrayTypes<LMPHostType>::t_int_scalar h_resize;
|
||||
typename ArrayTypes<LMPHostType>::t_int_scalar h_new_maxneighs;
|
||||
|
||||
const int xperiodic, yperiodic, zperiodic;
|
||||
const int xprd_half, yprd_half, zprd_half;
|
||||
|
||||
NeighborKokkosExecute(
|
||||
const NeighListKokkos<DeviceType> &_neigh_list,
|
||||
const typename AT::t_xfloat_2d_randomread &_cutneighsq,
|
||||
const typename AT::t_int_1d &_bincount,
|
||||
const typename AT::t_int_2d &_bins,
|
||||
const int _nstencil,
|
||||
const typename AT::t_int_1d &_d_stencil,
|
||||
const typename AT::t_int_1d_3 &_d_stencilxyz,
|
||||
const int _nlocal,
|
||||
const typename AT::t_x_array_randomread &_x,
|
||||
const typename AT::t_int_1d_const &_type,
|
||||
const typename AT::t_int_1d_const &_mask,
|
||||
const typename AT::t_int_1d_const &_molecule,
|
||||
const typename AT::t_tagint_1d_const &_tag,
|
||||
const typename AT::t_tagint_2d_const &_special,
|
||||
const typename AT::t_int_2d_const &_nspecial,
|
||||
const int &_molecular,
|
||||
const int & _nbinx,const int & _nbiny,const int & _nbinz,
|
||||
const int & _mbinx,const int & _mbiny,const int & _mbinz,
|
||||
const int & _mbinxlo,const int & _mbinylo,const int & _mbinzlo,
|
||||
const X_FLOAT &_bininvx,const X_FLOAT &_bininvy,const X_FLOAT &_bininvz,
|
||||
const int & _exclude,const int & _nex_type,
|
||||
const typename AT::t_int_1d_const & _ex1_type,
|
||||
const typename AT::t_int_1d_const & _ex2_type,
|
||||
const typename AT::t_int_2d_const & _ex_type,
|
||||
const int & _nex_group,
|
||||
const typename AT::t_int_1d_const & _ex1_group,
|
||||
const typename AT::t_int_1d_const & _ex2_group,
|
||||
const typename AT::t_int_1d_const & _ex1_bit,
|
||||
const typename AT::t_int_1d_const & _ex2_bit,
|
||||
const int & _nex_mol,
|
||||
const typename AT::t_int_1d_const & _ex_mol_group,
|
||||
const typename AT::t_int_1d_const & _ex_mol_bit,
|
||||
const X_FLOAT *_bboxhi, const X_FLOAT* _bboxlo,
|
||||
const int & _xperiodic, const int & _yperiodic, const int & _zperiodic,
|
||||
const int & _xprd_half, const int & _yprd_half, const int & _zprd_half):
|
||||
neigh_list(_neigh_list), cutneighsq(_cutneighsq),
|
||||
bincount(_bincount),c_bincount(_bincount),bins(_bins),c_bins(_bins),
|
||||
nstencil(_nstencil),d_stencil(_d_stencil),d_stencilxyz(_d_stencilxyz),
|
||||
nlocal(_nlocal),
|
||||
x(_x),type(_type),mask(_mask),molecule(_molecule),
|
||||
tag(_tag),special(_special),nspecial(_nspecial),molecular(_molecular),
|
||||
nbinx(_nbinx),nbiny(_nbiny),nbinz(_nbinz),
|
||||
mbinx(_mbinx),mbiny(_mbiny),mbinz(_mbinz),
|
||||
mbinxlo(_mbinxlo),mbinylo(_mbinylo),mbinzlo(_mbinzlo),
|
||||
bininvx(_bininvx),bininvy(_bininvy),bininvz(_bininvz),
|
||||
exclude(_exclude),nex_type(_nex_type),
|
||||
ex1_type(_ex1_type),ex2_type(_ex2_type),ex_type(_ex_type),
|
||||
nex_group(_nex_group),
|
||||
ex1_group(_ex1_group),ex2_group(_ex2_group),
|
||||
ex1_bit(_ex1_bit),ex2_bit(_ex2_bit),nex_mol(_nex_mol),
|
||||
ex_mol_group(_ex_mol_group),ex_mol_bit(_ex_mol_bit),
|
||||
xperiodic(_xperiodic),yperiodic(_yperiodic),zperiodic(_zperiodic),
|
||||
xprd_half(_xprd_half),yprd_half(_yprd_half),zprd_half(_zprd_half) {
|
||||
|
||||
if (molecular == 2) moltemplate = 1;
|
||||
else moltemplate = 0;
|
||||
|
||||
bboxlo[0] = _bboxlo[0]; bboxlo[1] = _bboxlo[1]; bboxlo[2] = _bboxlo[2];
|
||||
bboxhi[0] = _bboxhi[0]; bboxhi[1] = _bboxhi[1]; bboxhi[2] = _bboxhi[2];
|
||||
|
||||
resize = typename AT::t_int_scalar("NeighborKokkosFunctor::resize");
|
||||
#ifndef KOKKOS_USE_CUDA_UVM
|
||||
h_resize = Kokkos::create_mirror_view(resize);
|
||||
#else
|
||||
h_resize = resize;
|
||||
#endif
|
||||
h_resize() = 1;
|
||||
new_maxneighs = typename AT::
|
||||
t_int_scalar("NeighborKokkosFunctor::new_maxneighs");
|
||||
#ifndef KOKKOS_USE_CUDA_UVM
|
||||
h_new_maxneighs = Kokkos::create_mirror_view(new_maxneighs);
|
||||
#else
|
||||
h_new_maxneighs = new_maxneighs;
|
||||
#endif
|
||||
h_new_maxneighs() = neigh_list.maxneighs;
|
||||
};
|
||||
|
||||
~NeighborKokkosExecute() {neigh_list.clean_copy();};
|
||||
|
||||
template<int HalfNeigh, int Newton>
|
||||
KOKKOS_FUNCTION
|
||||
void build_Item(const int &i) const;
|
||||
|
||||
template<int HalfNeigh>
|
||||
KOKKOS_FUNCTION
|
||||
void build_Item_Ghost(const int &i) const;
|
||||
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
template<int HalfNeigh, int Newton>
|
||||
__device__ inline
|
||||
void build_ItemCuda(typename Kokkos::TeamPolicy<DeviceType>::member_type dev) const;
|
||||
#endif
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void binatomsItem(const int &i) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
int coord2bin(const X_FLOAT & x,const X_FLOAT & y,const X_FLOAT & z) const
|
||||
{
|
||||
int ix,iy,iz;
|
||||
|
||||
if (x >= bboxhi[0])
|
||||
ix = static_cast<int> ((x-bboxhi[0])*bininvx) + nbinx;
|
||||
else if (x >= bboxlo[0]) {
|
||||
ix = static_cast<int> ((x-bboxlo[0])*bininvx);
|
||||
ix = MIN(ix,nbinx-1);
|
||||
} else
|
||||
ix = static_cast<int> ((x-bboxlo[0])*bininvx) - 1;
|
||||
|
||||
if (y >= bboxhi[1])
|
||||
iy = static_cast<int> ((y-bboxhi[1])*bininvy) + nbiny;
|
||||
else if (y >= bboxlo[1]) {
|
||||
iy = static_cast<int> ((y-bboxlo[1])*bininvy);
|
||||
iy = MIN(iy,nbiny-1);
|
||||
} else
|
||||
iy = static_cast<int> ((y-bboxlo[1])*bininvy) - 1;
|
||||
|
||||
if (z >= bboxhi[2])
|
||||
iz = static_cast<int> ((z-bboxhi[2])*bininvz) + nbinz;
|
||||
else if (z >= bboxlo[2]) {
|
||||
iz = static_cast<int> ((z-bboxlo[2])*bininvz);
|
||||
iz = MIN(iz,nbinz-1);
|
||||
} else
|
||||
iz = static_cast<int> ((z-bboxlo[2])*bininvz) - 1;
|
||||
|
||||
return (iz-mbinzlo)*mbiny*mbinx + (iy-mbinylo)*mbinx + (ix-mbinxlo);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
int coord2bin(const X_FLOAT & x,const X_FLOAT & y,const X_FLOAT & z, int* i) const
|
||||
{
|
||||
int ix,iy,iz;
|
||||
|
||||
if (x >= bboxhi[0])
|
||||
ix = static_cast<int> ((x-bboxhi[0])*bininvx) + nbinx;
|
||||
else if (x >= bboxlo[0]) {
|
||||
ix = static_cast<int> ((x-bboxlo[0])*bininvx);
|
||||
ix = MIN(ix,nbinx-1);
|
||||
} else
|
||||
ix = static_cast<int> ((x-bboxlo[0])*bininvx) - 1;
|
||||
|
||||
if (y >= bboxhi[1])
|
||||
iy = static_cast<int> ((y-bboxhi[1])*bininvy) + nbiny;
|
||||
else if (y >= bboxlo[1]) {
|
||||
iy = static_cast<int> ((y-bboxlo[1])*bininvy);
|
||||
iy = MIN(iy,nbiny-1);
|
||||
} else
|
||||
iy = static_cast<int> ((y-bboxlo[1])*bininvy) - 1;
|
||||
|
||||
if (z >= bboxhi[2])
|
||||
iz = static_cast<int> ((z-bboxhi[2])*bininvz) + nbinz;
|
||||
else if (z >= bboxlo[2]) {
|
||||
iz = static_cast<int> ((z-bboxlo[2])*bininvz);
|
||||
iz = MIN(iz,nbinz-1);
|
||||
} else
|
||||
iz = static_cast<int> ((z-bboxlo[2])*bininvz) - 1;
|
||||
|
||||
i[0] = ix - mbinxlo;
|
||||
i[1] = iy - mbinylo;
|
||||
i[2] = iz - mbinzlo;
|
||||
|
||||
return (iz-mbinzlo)*mbiny*mbinx + (iy-mbinylo)*mbinx + (ix-mbinxlo);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
int exclusion(const int &i,const int &j, const int &itype,const int &jtype) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
int find_special(const int &i, const int &j) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
int minimum_image_check(double dx, double dy, double dz) const {
|
||||
if (xperiodic && fabs(dx) > xprd_half) return 1;
|
||||
if (yperiodic && fabs(dy) > yprd_half) return 1;
|
||||
if (zperiodic && fabs(dz) > zprd_half) return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
template<class DeviceType,int HALF_NEIGH,int GHOST_NEWTON>
|
||||
struct NPairKokkosBuildFunctor {
|
||||
typedef DeviceType device_type;
|
||||
|
||||
const NeighborKokkosExecute<DeviceType> c;
|
||||
const size_t sharedsize;
|
||||
|
||||
NPairKokkosBuildFunctor(const NeighborKokkosExecute<DeviceType> &_c,
|
||||
const size_t _sharedsize):c(_c),
|
||||
sharedsize(_sharedsize) {};
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (const int & i) const {
|
||||
c.template build_Item<HALF_NEIGH,GHOST_NEWTON>(i);
|
||||
}
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
__device__ inline
|
||||
|
||||
void operator() (typename Kokkos::TeamPolicy<DeviceType>::member_type dev) const {
|
||||
c.template build_ItemCuda<HALF_NEIGH,GHOST_NEWTON>(dev);
|
||||
}
|
||||
size_t shmem_size(const int team_size) const { (void) team_size; return sharedsize; }
|
||||
#endif
|
||||
};
|
||||
|
||||
template<int HALF_NEIGH,int GHOST_NEWTON>
|
||||
struct NPairKokkosBuildFunctor<LMPHostType,HALF_NEIGH,GHOST_NEWTON> {
|
||||
typedef LMPHostType device_type;
|
||||
|
||||
const NeighborKokkosExecute<LMPHostType> c;
|
||||
const size_t sharedsize;
|
||||
|
||||
NPairKokkosBuildFunctor(const NeighborKokkosExecute<LMPHostType> &_c,
|
||||
const size_t _sharedsize):c(_c),
|
||||
sharedsize(_sharedsize) {};
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (const int & i) const {
|
||||
c.template build_Item<HALF_NEIGH,GHOST_NEWTON>(i);
|
||||
}
|
||||
|
||||
void operator() (typename Kokkos::TeamPolicy<LMPHostType>::member_type dev) const {}
|
||||
};
|
||||
|
||||
template<class DeviceType,int HALF_NEIGH>
|
||||
struct NPairKokkosBuildFunctorGhost {
|
||||
typedef DeviceType device_type;
|
||||
|
||||
const NeighborKokkosExecute<DeviceType> c;
|
||||
const size_t sharedsize;
|
||||
|
||||
NPairKokkosBuildFunctorGhost(const NeighborKokkosExecute<DeviceType> &_c,
|
||||
const size_t _sharedsize):c(_c),
|
||||
sharedsize(_sharedsize) {};
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (const int & i) const {
|
||||
c.template build_Item_Ghost<HALF_NEIGH>(i);
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* ERROR/WARNING messages:
|
||||
|
||||
*/
|
||||
@ -90,7 +90,7 @@ void PairBuckCoulCutKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
||||
eflag = eflag_in;
|
||||
vflag = vflag_in;
|
||||
|
||||
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
|
||||
if (neighflag == FULL) no_virial_fdotr_compute = 1;
|
||||
|
||||
if (eflag || vflag) ev_setup(eflag,vflag);
|
||||
else evflag = vflag_fdotr = 0;
|
||||
@ -309,19 +309,12 @@ void PairBuckCoulCutKokkos<DeviceType>::init_style()
|
||||
if (neighflag == FULL) {
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
|
||||
neighbor->requests[irequest]->full = 0;
|
||||
neighbor->requests[irequest]->half = 1;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == N2) {
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
neighbor->requests[irequest]->full = 0;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
} else if (neighflag == FULLCLUSTER) {
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full_cluster = 1;
|
||||
} else {
|
||||
error->all(FLERR,"Cannot use chosen neighbor list style with buck/coul/cut/kk");
|
||||
}
|
||||
|
||||
@ -109,7 +109,7 @@ void PairBuckCoulLongKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
||||
eflag = eflag_in;
|
||||
vflag = vflag_in;
|
||||
|
||||
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
|
||||
if (neighflag == FULL) no_virial_fdotr_compute = 1;
|
||||
|
||||
if (eflag || vflag) ev_setup(eflag,vflag);
|
||||
else evflag = vflag_fdotr = 0;
|
||||
@ -458,11 +458,9 @@ void PairBuckCoulLongKokkos<DeviceType>::init_style()
|
||||
if (neighflag == FULL) {
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
|
||||
neighbor->requests[irequest]->full = 0;
|
||||
neighbor->requests[irequest]->half = 1;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else {
|
||||
error->all(FLERR,"Cannot use chosen neighbor list style with buck/coul/long/kk");
|
||||
}
|
||||
|
||||
@ -79,7 +79,7 @@ void PairBuckKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
||||
eflag = eflag_in;
|
||||
vflag = vflag_in;
|
||||
|
||||
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
|
||||
if (neighflag == FULL) no_virial_fdotr_compute = 1;
|
||||
|
||||
if (eflag || vflag) ev_setup(eflag,vflag);
|
||||
else evflag = vflag_fdotr = 0;
|
||||
@ -233,19 +233,12 @@ void PairBuckKokkos<DeviceType>::init_style()
|
||||
if (neighflag == FULL) {
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
|
||||
neighbor->requests[irequest]->full = 0;
|
||||
neighbor->requests[irequest]->half = 1;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == N2) {
|
||||
neighbor->requests[irequest]->full = 0;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == FULLCLUSTER) {
|
||||
neighbor->requests[irequest]->full_cluster = 1;
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
} else {
|
||||
error->all(FLERR,"Cannot use chosen neighbor list style with buck/kk");
|
||||
}
|
||||
|
||||
@ -31,7 +31,7 @@ namespace LAMMPS_NS {
|
||||
template<class DeviceType>
|
||||
class PairBuckKokkos : public PairBuck {
|
||||
public:
|
||||
enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF|N2|FULLCLUSTER};
|
||||
enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF|N2};
|
||||
enum {COUL_FLAG=0};
|
||||
typedef DeviceType device_type;
|
||||
PairBuckKokkos(class LAMMPS *);
|
||||
@ -96,17 +96,14 @@ class PairBuckKokkos : public PairBuck {
|
||||
friend class PairComputeFunctor<PairBuckKokkos,HALF,true>;
|
||||
friend class PairComputeFunctor<PairBuckKokkos,HALFTHREAD,true>;
|
||||
friend class PairComputeFunctor<PairBuckKokkos,N2,true>;
|
||||
friend class PairComputeFunctor<PairBuckKokkos,FULLCLUSTER,true >;
|
||||
friend class PairComputeFunctor<PairBuckKokkos,FULL,false>;
|
||||
friend class PairComputeFunctor<PairBuckKokkos,HALF,false>;
|
||||
friend class PairComputeFunctor<PairBuckKokkos,HALFTHREAD,false>;
|
||||
friend class PairComputeFunctor<PairBuckKokkos,N2,false>;
|
||||
friend class PairComputeFunctor<PairBuckKokkos,FULLCLUSTER,false >;
|
||||
friend EV_FLOAT pair_compute_neighlist<PairBuckKokkos,FULL,void>(PairBuckKokkos*,NeighListKokkos<DeviceType>*);
|
||||
friend EV_FLOAT pair_compute_neighlist<PairBuckKokkos,HALF,void>(PairBuckKokkos*,NeighListKokkos<DeviceType>*);
|
||||
friend EV_FLOAT pair_compute_neighlist<PairBuckKokkos,HALFTHREAD,void>(PairBuckKokkos*,NeighListKokkos<DeviceType>*);
|
||||
friend EV_FLOAT pair_compute_neighlist<PairBuckKokkos,N2,void>(PairBuckKokkos*,NeighListKokkos<DeviceType>*);
|
||||
friend EV_FLOAT pair_compute_fullcluster<PairBuckKokkos,void>(PairBuckKokkos*,NeighListKokkos<DeviceType>*);
|
||||
friend EV_FLOAT pair_compute<PairBuckKokkos,void>(PairBuckKokkos*,NeighListKokkos<DeviceType>*);
|
||||
friend void pair_virial_fdotr_compute<PairBuckKokkos>(PairBuckKokkos*);
|
||||
};
|
||||
|
||||
@ -78,7 +78,7 @@ void PairCoulCutKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
||||
vflag = vflag_in;
|
||||
|
||||
|
||||
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
|
||||
if (neighflag == FULL) no_virial_fdotr_compute = 1;
|
||||
|
||||
if (eflag || vflag) ev_setup(eflag,vflag);
|
||||
else evflag = vflag_fdotr = 0;
|
||||
@ -215,11 +215,9 @@ void PairCoulCutKokkos<DeviceType>::init_style()
|
||||
if (neighflag == FULL) {
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
|
||||
neighbor->requests[irequest]->full = 0;
|
||||
neighbor->requests[irequest]->half = 1;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else {
|
||||
error->all(FLERR,"Cannot use chosen neighbor list style with coul/cut/kk");
|
||||
}
|
||||
|
||||
@ -85,7 +85,7 @@ void PairCoulDebyeKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
||||
eflag = eflag_in;
|
||||
vflag = vflag_in;
|
||||
|
||||
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
|
||||
if (neighflag == FULL) no_virial_fdotr_compute = 1;
|
||||
|
||||
if (eflag || vflag) ev_setup(eflag,vflag);
|
||||
else evflag = vflag_fdotr = 0;
|
||||
@ -257,19 +257,12 @@ void PairCoulDebyeKokkos<DeviceType>::init_style()
|
||||
if (neighflag == FULL) {
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
|
||||
neighbor->requests[irequest]->full = 0;
|
||||
neighbor->requests[irequest]->half = 1;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == N2) {
|
||||
neighbor->requests[irequest]->full = 0;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == FULLCLUSTER) {
|
||||
neighbor->requests[irequest]->full_cluster = 1;
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
} else {
|
||||
error->all(FLERR,"Cannot use chosen neighbor list style with coul/debye/kk");
|
||||
}
|
||||
|
||||
@ -221,11 +221,9 @@ void PairCoulDSFKokkos<DeviceType>::init_style()
|
||||
if (neighflag == FULL) {
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
|
||||
neighbor->requests[irequest]->full = 0;
|
||||
neighbor->requests[irequest]->half = 1;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else {
|
||||
error->all(FLERR,"Cannot use chosen neighbor list style with coul/dsf/kk");
|
||||
}
|
||||
|
||||
@ -102,7 +102,7 @@ void PairCoulLongKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
||||
eflag = eflag_in;
|
||||
vflag = vflag_in;
|
||||
|
||||
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
|
||||
if (neighflag == FULL) no_virial_fdotr_compute = 1;
|
||||
|
||||
if (eflag || vflag) ev_setup(eflag,vflag);
|
||||
else evflag = vflag_fdotr = 0;
|
||||
@ -408,11 +408,9 @@ void PairCoulLongKokkos<DeviceType>::init_style()
|
||||
if (neighflag == FULL) {
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
|
||||
neighbor->requests[irequest]->full = 0;
|
||||
neighbor->requests[irequest]->half = 1;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else {
|
||||
error->all(FLERR,"Cannot use chosen neighbor list style with buck/coul/long/kk");
|
||||
}
|
||||
|
||||
@ -222,11 +222,9 @@ void PairCoulWolfKokkos<DeviceType>::init_style()
|
||||
if (neighflag == FULL) {
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
|
||||
neighbor->requests[irequest]->full = 0;
|
||||
neighbor->requests[irequest]->half = 1;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else {
|
||||
error->all(FLERR,"Cannot use chosen neighbor list style with coul/wolf/kk");
|
||||
}
|
||||
|
||||
@ -286,11 +286,9 @@ void PairEAMAlloyKokkos<DeviceType>::init_style()
|
||||
if (neighflag == FULL) {
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
|
||||
neighbor->requests[irequest]->full = 0;
|
||||
neighbor->requests[irequest]->half = 1;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else {
|
||||
error->all(FLERR,"Cannot use chosen neighbor list style with pair eam/kk/alloy");
|
||||
}
|
||||
|
||||
@ -291,11 +291,9 @@ void PairEAMFSKokkos<DeviceType>::init_style()
|
||||
if (neighflag == FULL) {
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
|
||||
neighbor->requests[irequest]->full = 0;
|
||||
neighbor->requests[irequest]->half = 1;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else {
|
||||
error->all(FLERR,"Cannot use chosen neighbor list style with pair eam/kk/fs");
|
||||
}
|
||||
|
||||
@ -281,11 +281,9 @@ void PairEAMKokkos<DeviceType>::init_style()
|
||||
if (neighflag == FULL) {
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
|
||||
neighbor->requests[irequest]->full = 0;
|
||||
neighbor->requests[irequest]->half = 1;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else {
|
||||
error->all(FLERR,"Cannot use chosen neighbor list style with pair eam/kk");
|
||||
}
|
||||
|
||||
@ -333,145 +333,6 @@ struct PairComputeFunctor {
|
||||
}
|
||||
};
|
||||
|
||||
template <class PairStyle, bool STACKPARAMS, class Specialisation>
|
||||
struct PairComputeFunctor<PairStyle,FULLCLUSTER,STACKPARAMS,Specialisation> {
|
||||
typedef typename PairStyle::device_type device_type ;
|
||||
typedef EV_FLOAT value_type;
|
||||
|
||||
PairStyle c;
|
||||
NeighListKokkos<device_type> list;
|
||||
|
||||
PairComputeFunctor(PairStyle* c_ptr,
|
||||
NeighListKokkos<device_type>* list_ptr):
|
||||
c(*c_ptr),list(*list_ptr) {};
|
||||
~PairComputeFunctor() {c.cleanup_copy();list.clean_copy();};
|
||||
|
||||
KOKKOS_INLINE_FUNCTION int sbmask(const int& j) const {
|
||||
return j >> SBBITS & 3;
|
||||
}
|
||||
|
||||
template<int EVFLAG, int NEWTON_PAIR>
|
||||
KOKKOS_FUNCTION
|
||||
EV_FLOAT compute_item(const typename Kokkos::TeamPolicy<device_type>::member_type& dev,
|
||||
const NeighListKokkos<device_type> &list, const NoCoulTag& ) const {
|
||||
EV_FLOAT ev;
|
||||
int i = dev.league_rank()*dev.team_size() + dev.team_rank();
|
||||
|
||||
const X_FLOAT xtmp = c.c_x(i,0);
|
||||
const X_FLOAT ytmp = c.c_x(i,1);
|
||||
const X_FLOAT ztmp = c.c_x(i,2);
|
||||
int itype = c.type(i);
|
||||
|
||||
const AtomNeighborsConst neighbors_i = list.get_neighbors_const(i);
|
||||
const int jnum = list.d_numneigh[i];
|
||||
|
||||
F_FLOAT3 ftmp;
|
||||
|
||||
for (int jj = 0; jj < jnum; jj++) {
|
||||
int jjj = neighbors_i(jj);
|
||||
|
||||
Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(dev,NeighClusterSize),[&] (const int& k, F_FLOAT3& fftmp) {
|
||||
const F_FLOAT factor_lj = c.special_lj[sbmask(jjj+k)];
|
||||
const int j = (jjj + k)&NEIGHMASK;
|
||||
if((j==i)||(j>=c.nall)) return;
|
||||
const X_FLOAT delx = xtmp - c.c_x(j,0);
|
||||
const X_FLOAT dely = ytmp - c.c_x(j,1);
|
||||
const X_FLOAT delz = ztmp - c.c_x(j,2);
|
||||
const int jtype = c.type(j);
|
||||
const F_FLOAT rsq = (delx*delx + dely*dely + delz*delz);
|
||||
|
||||
if(rsq < (STACKPARAMS?c.m_cutsq[itype][jtype]:c.d_cutsq(itype,jtype))) {
|
||||
|
||||
const F_FLOAT fpair = factor_lj*c.template compute_fpair<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype);
|
||||
fftmp.x += delx*fpair;
|
||||
fftmp.y += dely*fpair;
|
||||
fftmp.z += delz*fpair;
|
||||
|
||||
if (EVFLAG) {
|
||||
F_FLOAT evdwl = 0.0;
|
||||
if (c.eflag) {
|
||||
evdwl = 0.5*
|
||||
factor_lj * c.template compute_evdwl<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype);
|
||||
ev.evdwl += evdwl;
|
||||
}
|
||||
|
||||
if (c.vflag_either || c.eflag_atom) ev_tally(ev,i,j,evdwl,fpair,delx,dely,delz);
|
||||
}
|
||||
}
|
||||
},ftmp);
|
||||
}
|
||||
|
||||
Kokkos::single(Kokkos::PerThread(dev), [&]() {
|
||||
c.f(i,0) += ftmp.x;
|
||||
c.f(i,1) += ftmp.y;
|
||||
c.f(i,2) += ftmp.z;
|
||||
});
|
||||
|
||||
return ev;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void ev_tally(EV_FLOAT &ev, const int &i, const int &j,
|
||||
const F_FLOAT &epair, const F_FLOAT &fpair, const F_FLOAT &delx,
|
||||
const F_FLOAT &dely, const F_FLOAT &delz) const
|
||||
{
|
||||
const int EFLAG = c.eflag;
|
||||
const int NEWTON_PAIR = c.newton_pair;
|
||||
const int VFLAG = c.vflag_either;
|
||||
|
||||
if (EFLAG) {
|
||||
if (c.eflag_atom) {
|
||||
const E_FLOAT epairhalf = 0.5 * epair;
|
||||
if (NEWTON_PAIR || i < c.nlocal) c.d_eatom[i] += epairhalf;
|
||||
if (NEWTON_PAIR || j < c.nlocal) c.d_eatom[j] += epairhalf;
|
||||
}
|
||||
}
|
||||
|
||||
if (VFLAG) {
|
||||
const E_FLOAT v0 = delx*delx*fpair;
|
||||
const E_FLOAT v1 = dely*dely*fpair;
|
||||
const E_FLOAT v2 = delz*delz*fpair;
|
||||
const E_FLOAT v3 = delx*dely*fpair;
|
||||
const E_FLOAT v4 = delx*delz*fpair;
|
||||
const E_FLOAT v5 = dely*delz*fpair;
|
||||
|
||||
if (c.vflag_global) {
|
||||
ev.v[0] += 0.5*v0;
|
||||
ev.v[1] += 0.5*v1;
|
||||
ev.v[2] += 0.5*v2;
|
||||
ev.v[3] += 0.5*v3;
|
||||
ev.v[4] += 0.5*v4;
|
||||
ev.v[5] += 0.5*v5;
|
||||
}
|
||||
|
||||
if (c.vflag_atom) {
|
||||
if (i < c.nlocal) {
|
||||
c.d_vatom(i,0) += 0.5*v0;
|
||||
c.d_vatom(i,1) += 0.5*v1;
|
||||
c.d_vatom(i,2) += 0.5*v2;
|
||||
c.d_vatom(i,3) += 0.5*v3;
|
||||
c.d_vatom(i,4) += 0.5*v4;
|
||||
c.d_vatom(i,5) += 0.5*v5;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(const typename Kokkos::TeamPolicy<device_type>::member_type& dev) const {
|
||||
if (c.newton_pair) compute_item<0,1>(dev,list,typename DoCoul<PairStyle::COUL_FLAG>::type());
|
||||
else compute_item<0,0>(dev,list,typename DoCoul<PairStyle::COUL_FLAG>::type());
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(const typename Kokkos::TeamPolicy<device_type>::member_type& dev, value_type &energy_virial) const {
|
||||
if (c.newton_pair)
|
||||
energy_virial += compute_item<1,1>(dev,list,typename DoCoul<PairStyle::COUL_FLAG>::type());
|
||||
else
|
||||
energy_virial += compute_item<1,0>(dev,list,typename DoCoul<PairStyle::COUL_FLAG>::type());
|
||||
}
|
||||
};
|
||||
|
||||
template <class PairStyle, bool STACKPARAMS, class Specialisation>
|
||||
struct PairComputeFunctor<PairStyle,N2,STACKPARAMS,Specialisation> {
|
||||
typedef typename PairStyle::device_type device_type ;
|
||||
@ -607,8 +468,8 @@ struct PairComputeFunctor<PairStyle,N2,STACKPARAMS,Specialisation> {
|
||||
// The enable_if clause will invalidate the last parameter of the function, so that
|
||||
// a match is only achieved, if PairStyle supports the specific neighborlist variant.
|
||||
// This uses the fact that failure to match template parameters is not an error.
|
||||
// By having the enable_if with a ! and without it, exactly one of the two versions of the functions
|
||||
// pair_compute_neighlist and pair_compute_fullcluster will match - either the dummy version
|
||||
// By having the enable_if with a ! and without it, exactly one of the functions
|
||||
// pair_compute_neighlist will match - either the dummy version
|
||||
// or the real one further below.
|
||||
template<class PairStyle, unsigned NEIGHFLAG, class Specialisation>
|
||||
EV_FLOAT pair_compute_neighlist (PairStyle* fpair, typename Kokkos::Impl::enable_if<!((NEIGHFLAG&PairStyle::EnabledNeighFlags) != 0), NeighListKokkos<typename PairStyle::device_type>*>::type list) {
|
||||
@ -619,15 +480,6 @@ EV_FLOAT pair_compute_neighlist (PairStyle* fpair, typename Kokkos::Impl::enable
|
||||
return ev;
|
||||
}
|
||||
|
||||
template<class PairStyle, class Specialisation>
|
||||
EV_FLOAT pair_compute_fullcluster (PairStyle* fpair, typename Kokkos::Impl::enable_if<!((FULLCLUSTER&PairStyle::EnabledNeighFlags) != 0), NeighListKokkos<typename PairStyle::device_type>*>::type list) {
|
||||
EV_FLOAT ev;
|
||||
(void) fpair;
|
||||
(void) list;
|
||||
printf("ERROR: calling pair_compute with invalid neighbor list style: requested %i available %i \n",FULLCLUSTER,PairStyle::EnabledNeighFlags);
|
||||
return ev;
|
||||
}
|
||||
|
||||
// Submit ParallelFor for NEIGHFLAG=HALF,HALFTHREAD,FULL,N2
|
||||
template<class PairStyle, unsigned NEIGHFLAG, class Specialisation>
|
||||
EV_FLOAT pair_compute_neighlist (PairStyle* fpair, typename Kokkos::Impl::enable_if<(NEIGHFLAG&PairStyle::EnabledNeighFlags) != 0, NeighListKokkos<typename PairStyle::device_type>*>::type list) {
|
||||
@ -644,41 +496,6 @@ EV_FLOAT pair_compute_neighlist (PairStyle* fpair, typename Kokkos::Impl::enable
|
||||
return ev;
|
||||
}
|
||||
|
||||
// Submit ParallelFor for NEIGHFLAG=FULLCLUSTER
|
||||
template<class PairStyle, class Specialisation>
|
||||
EV_FLOAT pair_compute_fullcluster (PairStyle* fpair, typename Kokkos::Impl::enable_if<(FULLCLUSTER&PairStyle::EnabledNeighFlags) != 0, NeighListKokkos<typename PairStyle::device_type>*>::type list) {
|
||||
EV_FLOAT ev;
|
||||
if(fpair->atom->ntypes > MAX_TYPES_STACKPARAMS) {
|
||||
typedef PairComputeFunctor<PairStyle,FULLCLUSTER,false,Specialisation >
|
||||
f_type;
|
||||
f_type ff(fpair, list);
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
const int teamsize = Kokkos::Impl::is_same<typename f_type::device_type, Kokkos::Cuda>::value ? 32 : 1;
|
||||
#else
|
||||
const int teamsize = 1;
|
||||
#endif
|
||||
const int nteams = (list->inum*+teamsize-1)/teamsize;
|
||||
Kokkos::TeamPolicy<typename f_type::device_type> config(nteams,teamsize,NeighClusterSize);
|
||||
if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(config,ff,ev);
|
||||
else Kokkos::parallel_for(config,ff);
|
||||
} else {
|
||||
typedef PairComputeFunctor<PairStyle,FULLCLUSTER,true,Specialisation >
|
||||
f_type;
|
||||
f_type ff(fpair, list);
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
const int teamsize = Kokkos::Impl::is_same<typename f_type::device_type, Kokkos::Cuda>::value ? 32 : 1;
|
||||
#else
|
||||
const int teamsize = 1;
|
||||
#endif
|
||||
const int nteams = (list->inum*+teamsize-1)/teamsize;
|
||||
Kokkos::TeamPolicy<typename f_type::device_type> config(nteams,teamsize,NeighClusterSize);
|
||||
if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(config,ff,ev);
|
||||
else Kokkos::parallel_for(config,ff);
|
||||
}
|
||||
return ev;
|
||||
}
|
||||
|
||||
|
||||
template<class PairStyle, class Specialisation>
|
||||
EV_FLOAT pair_compute (PairStyle* fpair, NeighListKokkos<typename PairStyle::device_type>* list) {
|
||||
EV_FLOAT ev;
|
||||
@ -690,8 +507,6 @@ EV_FLOAT pair_compute (PairStyle* fpair, NeighListKokkos<typename PairStyle::dev
|
||||
ev = pair_compute_neighlist<PairStyle,HALF,Specialisation> (fpair,list);
|
||||
} else if (fpair->neighflag == N2) {
|
||||
ev = pair_compute_neighlist<PairStyle,N2,Specialisation> (fpair,list);
|
||||
} else if (fpair->neighflag == FULLCLUSTER) {
|
||||
ev = pair_compute_fullcluster<PairStyle,Specialisation> (fpair,list);
|
||||
}
|
||||
return ev;
|
||||
}
|
||||
|
||||
@ -110,7 +110,7 @@ void PairLJCharmmCoulCharmmImplicitKokkos<DeviceType>::compute(int eflag_in, int
|
||||
eflag = eflag_in;
|
||||
vflag = vflag_in;
|
||||
|
||||
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
|
||||
if (neighflag == FULL) no_virial_fdotr_compute = 1;
|
||||
|
||||
if (eflag || vflag) ev_setup(eflag,vflag);
|
||||
else evflag = vflag_fdotr = 0;
|
||||
@ -455,11 +455,9 @@ void PairLJCharmmCoulCharmmImplicitKokkos<DeviceType>::init_style()
|
||||
if (neighflag == FULL) {
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
|
||||
neighbor->requests[irequest]->full = 0;
|
||||
neighbor->requests[irequest]->half = 1;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else {
|
||||
error->all(FLERR,"Cannot use chosen neighbor list style with lj/charmm/coul/charmm/implicit/kk");
|
||||
}
|
||||
|
||||
@ -110,7 +110,7 @@ void PairLJCharmmCoulCharmmKokkos<DeviceType>::compute(int eflag_in, int vflag_i
|
||||
eflag = eflag_in;
|
||||
vflag = vflag_in;
|
||||
|
||||
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
|
||||
if (neighflag == FULL) no_virial_fdotr_compute = 1;
|
||||
|
||||
if (eflag || vflag) ev_setup(eflag,vflag);
|
||||
else evflag = vflag_fdotr = 0;
|
||||
@ -456,11 +456,9 @@ void PairLJCharmmCoulCharmmKokkos<DeviceType>::init_style()
|
||||
if (neighflag == FULL) {
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
|
||||
neighbor->requests[irequest]->full = 0;
|
||||
neighbor->requests[irequest]->half = 1;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else {
|
||||
error->all(FLERR,"Cannot use chosen neighbor list style with lj/charmm/coul/charmm/kk");
|
||||
}
|
||||
|
||||
@ -110,7 +110,7 @@ void PairLJCharmmCoulLongKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
||||
eflag = eflag_in;
|
||||
vflag = vflag_in;
|
||||
|
||||
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
|
||||
if (neighflag == FULL) no_virial_fdotr_compute = 1;
|
||||
|
||||
if (eflag || vflag) ev_setup(eflag,vflag);
|
||||
else evflag = vflag_fdotr = 0;
|
||||
@ -486,11 +486,9 @@ void PairLJCharmmCoulLongKokkos<DeviceType>::init_style()
|
||||
if (neighflag == FULL) {
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
|
||||
neighbor->requests[irequest]->full = 0;
|
||||
neighbor->requests[irequest]->half = 1;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else {
|
||||
error->all(FLERR,"Cannot use chosen neighbor list style with lj/charmm/coul/long/kk");
|
||||
}
|
||||
|
||||
@ -87,7 +87,7 @@ void PairLJClass2CoulCutKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
||||
eflag = eflag_in;
|
||||
vflag = vflag_in;
|
||||
|
||||
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
|
||||
if (neighflag == FULL) no_virial_fdotr_compute = 1;
|
||||
|
||||
if (eflag || vflag) ev_setup(eflag,vflag);
|
||||
else evflag = vflag_fdotr = 0;
|
||||
@ -289,19 +289,12 @@ void PairLJClass2CoulCutKokkos<DeviceType>::init_style()
|
||||
if (neighflag == FULL) {
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
|
||||
neighbor->requests[irequest]->full = 0;
|
||||
neighbor->requests[irequest]->half = 1;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == N2) {
|
||||
neighbor->requests[irequest]->full = 0;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == FULLCLUSTER) {
|
||||
neighbor->requests[irequest]->full_cluster = 1;
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
} else {
|
||||
error->all(FLERR,"Cannot use chosen neighbor list style with lj/class2/coul/cut/kk");
|
||||
}
|
||||
|
||||
@ -95,7 +95,7 @@ void PairLJClass2CoulLongKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
||||
eflag = eflag_in;
|
||||
vflag = vflag_in;
|
||||
|
||||
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
|
||||
if (neighflag == FULL) no_virial_fdotr_compute = 1;
|
||||
|
||||
if (eflag || vflag) ev_setup(eflag,vflag);
|
||||
else evflag = vflag_fdotr = 0;
|
||||
@ -445,11 +445,9 @@ void PairLJClass2CoulLongKokkos<DeviceType>::init_style()
|
||||
if (neighflag == FULL) {
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
|
||||
neighbor->requests[irequest]->full = 0;
|
||||
neighbor->requests[irequest]->half = 1;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else {
|
||||
error->all(FLERR,"Cannot use chosen neighbor list style with lj/class2/coul/long/kk");
|
||||
}
|
||||
|
||||
@ -87,7 +87,7 @@ void PairLJClass2Kokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
||||
vflag = vflag_in;
|
||||
|
||||
|
||||
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
|
||||
if (neighflag == FULL) no_virial_fdotr_compute = 1;
|
||||
|
||||
if (eflag || vflag) ev_setup(eflag,vflag);
|
||||
else evflag = vflag_fdotr = 0;
|
||||
@ -227,19 +227,12 @@ void PairLJClass2Kokkos<DeviceType>::init_style()
|
||||
if (neighflag == FULL) {
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
|
||||
neighbor->requests[irequest]->full = 0;
|
||||
neighbor->requests[irequest]->half = 1;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == N2) {
|
||||
neighbor->requests[irequest]->full = 0;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == FULLCLUSTER) {
|
||||
neighbor->requests[irequest]->full_cluster = 1;
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
} else {
|
||||
error->all(FLERR,"Cannot use chosen neighbor list style with lj/class2/kk");
|
||||
}
|
||||
|
||||
@ -31,7 +31,7 @@ namespace LAMMPS_NS {
|
||||
template<class DeviceType>
|
||||
class PairLJClass2Kokkos : public PairLJClass2 {
|
||||
public:
|
||||
enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF|N2|FULLCLUSTER};
|
||||
enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF|N2};
|
||||
enum {COUL_FLAG=0};
|
||||
typedef DeviceType device_type;
|
||||
PairLJClass2Kokkos(class LAMMPS *);
|
||||
@ -99,17 +99,14 @@ class PairLJClass2Kokkos : public PairLJClass2 {
|
||||
friend class PairComputeFunctor<PairLJClass2Kokkos,HALF,true>;
|
||||
friend class PairComputeFunctor<PairLJClass2Kokkos,HALFTHREAD,true>;
|
||||
friend class PairComputeFunctor<PairLJClass2Kokkos,N2,true>;
|
||||
friend class PairComputeFunctor<PairLJClass2Kokkos,FULLCLUSTER,true >;
|
||||
friend class PairComputeFunctor<PairLJClass2Kokkos,FULL,false>;
|
||||
friend class PairComputeFunctor<PairLJClass2Kokkos,HALF,false>;
|
||||
friend class PairComputeFunctor<PairLJClass2Kokkos,HALFTHREAD,false>;
|
||||
friend class PairComputeFunctor<PairLJClass2Kokkos,N2,false>;
|
||||
friend class PairComputeFunctor<PairLJClass2Kokkos,FULLCLUSTER,false >;
|
||||
friend EV_FLOAT pair_compute_neighlist<PairLJClass2Kokkos,FULL,void>(PairLJClass2Kokkos*,NeighListKokkos<DeviceType>*);
|
||||
friend EV_FLOAT pair_compute_neighlist<PairLJClass2Kokkos,HALF,void>(PairLJClass2Kokkos*,NeighListKokkos<DeviceType>*);
|
||||
friend EV_FLOAT pair_compute_neighlist<PairLJClass2Kokkos,HALFTHREAD,void>(PairLJClass2Kokkos*,NeighListKokkos<DeviceType>*);
|
||||
friend EV_FLOAT pair_compute_neighlist<PairLJClass2Kokkos,N2,void>(PairLJClass2Kokkos*,NeighListKokkos<DeviceType>*);
|
||||
friend EV_FLOAT pair_compute_fullcluster<PairLJClass2Kokkos,void>(PairLJClass2Kokkos*,NeighListKokkos<DeviceType>*);
|
||||
friend EV_FLOAT pair_compute<PairLJClass2Kokkos,void>(PairLJClass2Kokkos*,NeighListKokkos<DeviceType>*);
|
||||
friend void pair_virial_fdotr_compute<PairLJClass2Kokkos>(PairLJClass2Kokkos*);
|
||||
};
|
||||
|
||||
@ -87,7 +87,7 @@ void PairLJCutCoulCutKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
||||
eflag = eflag_in;
|
||||
vflag = vflag_in;
|
||||
|
||||
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
|
||||
if (neighflag == FULL) no_virial_fdotr_compute = 1;
|
||||
|
||||
if (eflag || vflag) ev_setup(eflag,vflag);
|
||||
else evflag = vflag_fdotr = 0;
|
||||
@ -280,19 +280,12 @@ void PairLJCutCoulCutKokkos<DeviceType>::init_style()
|
||||
if (neighflag == FULL) {
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
|
||||
neighbor->requests[irequest]->full = 0;
|
||||
neighbor->requests[irequest]->half = 1;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == N2) {
|
||||
neighbor->requests[irequest]->full = 0;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == FULLCLUSTER) {
|
||||
neighbor->requests[irequest]->full_cluster = 1;
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
} else {
|
||||
error->all(FLERR,"Cannot use chosen neighbor list style with lj/cut/coul/cut/kk");
|
||||
}
|
||||
|
||||
@ -91,7 +91,7 @@ void PairLJCutCoulDebyeKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
||||
eflag = eflag_in;
|
||||
vflag = vflag_in;
|
||||
|
||||
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
|
||||
if (neighflag == FULL) no_virial_fdotr_compute = 1;
|
||||
|
||||
if (eflag || vflag) ev_setup(eflag,vflag);
|
||||
else evflag = vflag_fdotr = 0;
|
||||
@ -310,19 +310,12 @@ void PairLJCutCoulDebyeKokkos<DeviceType>::init_style()
|
||||
if (neighflag == FULL) {
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
|
||||
neighbor->requests[irequest]->full = 0;
|
||||
neighbor->requests[irequest]->half = 1;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == N2) {
|
||||
neighbor->requests[irequest]->full = 0;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == FULLCLUSTER) {
|
||||
neighbor->requests[irequest]->full_cluster = 1;
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
} else {
|
||||
error->all(FLERR,"Cannot use chosen neighbor list style with lj/cut/coul/debye/kk");
|
||||
}
|
||||
|
||||
@ -99,7 +99,7 @@ void PairLJCutCoulDSFKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
||||
eflag = eflag_in;
|
||||
vflag = vflag_in;
|
||||
|
||||
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
|
||||
if (neighflag == FULL) no_virial_fdotr_compute = 1;
|
||||
|
||||
if (eflag || vflag) ev_setup(eflag,vflag);
|
||||
else evflag = vflag_fdotr = 0;
|
||||
@ -301,19 +301,12 @@ void PairLJCutCoulDSFKokkos<DeviceType>::init_style()
|
||||
if (neighflag == FULL) {
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
|
||||
neighbor->requests[irequest]->full = 0;
|
||||
neighbor->requests[irequest]->half = 1;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == N2) {
|
||||
neighbor->requests[irequest]->full = 0;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == FULLCLUSTER) {
|
||||
neighbor->requests[irequest]->full_cluster = 1;
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
} else {
|
||||
error->all(FLERR,"Cannot use chosen neighbor list style with lj/cut/coul/cut/kk");
|
||||
}
|
||||
|
||||
@ -99,7 +99,7 @@ void PairLJCutCoulLongKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
||||
eflag = eflag_in;
|
||||
vflag = vflag_in;
|
||||
|
||||
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
|
||||
if (neighflag == FULL) no_virial_fdotr_compute = 1;
|
||||
|
||||
if (eflag || vflag) ev_setup(eflag,vflag);
|
||||
else evflag = vflag_fdotr = 0;
|
||||
@ -464,11 +464,9 @@ void PairLJCutCoulLongKokkos<DeviceType>::init_style()
|
||||
if (neighflag == FULL) {
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
|
||||
neighbor->requests[irequest]->full = 0;
|
||||
neighbor->requests[irequest]->half = 1;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else {
|
||||
error->all(FLERR,"Cannot use chosen neighbor list style with lj/cut/coul/long/kk");
|
||||
}
|
||||
|
||||
@ -87,7 +87,7 @@ void PairLJCutKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
||||
vflag = vflag_in;
|
||||
|
||||
|
||||
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
|
||||
if (neighflag == FULL) no_virial_fdotr_compute = 1;
|
||||
|
||||
if (eflag || vflag) ev_setup(eflag,vflag);
|
||||
else evflag = vflag_fdotr = 0;
|
||||
@ -245,19 +245,12 @@ void PairLJCutKokkos<DeviceType>::init_style()
|
||||
if (neighflag == FULL) {
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
|
||||
neighbor->requests[irequest]->full = 0;
|
||||
neighbor->requests[irequest]->half = 1;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == N2) {
|
||||
neighbor->requests[irequest]->full = 0;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == FULLCLUSTER) {
|
||||
neighbor->requests[irequest]->full_cluster = 1;
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
} else {
|
||||
error->all(FLERR,"Cannot use chosen neighbor list style with lj/cut/kk");
|
||||
}
|
||||
|
||||
@ -31,7 +31,7 @@ namespace LAMMPS_NS {
|
||||
template<class DeviceType>
|
||||
class PairLJCutKokkos : public PairLJCut {
|
||||
public:
|
||||
enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF|N2|FULLCLUSTER};
|
||||
enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF|N2};
|
||||
enum {COUL_FLAG=0};
|
||||
typedef DeviceType device_type;
|
||||
PairLJCutKokkos(class LAMMPS *);
|
||||
@ -99,17 +99,14 @@ class PairLJCutKokkos : public PairLJCut {
|
||||
friend class PairComputeFunctor<PairLJCutKokkos,HALF,true>;
|
||||
friend class PairComputeFunctor<PairLJCutKokkos,HALFTHREAD,true>;
|
||||
friend class PairComputeFunctor<PairLJCutKokkos,N2,true>;
|
||||
friend class PairComputeFunctor<PairLJCutKokkos,FULLCLUSTER,true >;
|
||||
friend class PairComputeFunctor<PairLJCutKokkos,FULL,false>;
|
||||
friend class PairComputeFunctor<PairLJCutKokkos,HALF,false>;
|
||||
friend class PairComputeFunctor<PairLJCutKokkos,HALFTHREAD,false>;
|
||||
friend class PairComputeFunctor<PairLJCutKokkos,N2,false>;
|
||||
friend class PairComputeFunctor<PairLJCutKokkos,FULLCLUSTER,false >;
|
||||
friend EV_FLOAT pair_compute_neighlist<PairLJCutKokkos,FULL,void>(PairLJCutKokkos*,NeighListKokkos<DeviceType>*);
|
||||
friend EV_FLOAT pair_compute_neighlist<PairLJCutKokkos,HALF,void>(PairLJCutKokkos*,NeighListKokkos<DeviceType>*);
|
||||
friend EV_FLOAT pair_compute_neighlist<PairLJCutKokkos,HALFTHREAD,void>(PairLJCutKokkos*,NeighListKokkos<DeviceType>*);
|
||||
friend EV_FLOAT pair_compute_neighlist<PairLJCutKokkos,N2,void>(PairLJCutKokkos*,NeighListKokkos<DeviceType>*);
|
||||
friend EV_FLOAT pair_compute_fullcluster<PairLJCutKokkos,void>(PairLJCutKokkos*,NeighListKokkos<DeviceType>*);
|
||||
friend EV_FLOAT pair_compute<PairLJCutKokkos,void>(PairLJCutKokkos*,NeighListKokkos<DeviceType>*);
|
||||
friend void pair_virial_fdotr_compute<PairLJCutKokkos>(PairLJCutKokkos*);
|
||||
};
|
||||
|
||||
@ -86,7 +86,7 @@ void PairLJExpandKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
||||
eflag = eflag_in;
|
||||
vflag = vflag_in;
|
||||
|
||||
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
|
||||
if (neighflag == FULL) no_virial_fdotr_compute = 1;
|
||||
|
||||
if (eflag || vflag) ev_setup(eflag,vflag);
|
||||
else evflag = vflag_fdotr = 0;
|
||||
@ -230,19 +230,12 @@ void PairLJExpandKokkos<DeviceType>::init_style()
|
||||
if (neighflag == FULL) {
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
|
||||
neighbor->requests[irequest]->full = 0;
|
||||
neighbor->requests[irequest]->half = 1;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == N2) {
|
||||
neighbor->requests[irequest]->full = 0;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == FULLCLUSTER) {
|
||||
neighbor->requests[irequest]->full_cluster = 1;
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
} else {
|
||||
error->all(FLERR,"Cannot use chosen neighbor list style with lj/expand/kk");
|
||||
}
|
||||
|
||||
@ -31,7 +31,7 @@ namespace LAMMPS_NS {
|
||||
template<class DeviceType>
|
||||
class PairLJExpandKokkos : public PairLJExpand {
|
||||
public:
|
||||
enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF|N2|FULLCLUSTER};
|
||||
enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF|N2};
|
||||
enum {COUL_FLAG=0};
|
||||
typedef DeviceType device_type;
|
||||
PairLJExpandKokkos(class LAMMPS *);
|
||||
@ -100,17 +100,14 @@ class PairLJExpandKokkos : public PairLJExpand {
|
||||
friend class PairComputeFunctor<PairLJExpandKokkos,HALF,true>;
|
||||
friend class PairComputeFunctor<PairLJExpandKokkos,HALFTHREAD,true>;
|
||||
friend class PairComputeFunctor<PairLJExpandKokkos,N2,true>;
|
||||
friend class PairComputeFunctor<PairLJExpandKokkos,FULLCLUSTER,true >;
|
||||
friend class PairComputeFunctor<PairLJExpandKokkos,FULL,false>;
|
||||
friend class PairComputeFunctor<PairLJExpandKokkos,HALF,false>;
|
||||
friend class PairComputeFunctor<PairLJExpandKokkos,HALFTHREAD,false>;
|
||||
friend class PairComputeFunctor<PairLJExpandKokkos,N2,false>;
|
||||
friend class PairComputeFunctor<PairLJExpandKokkos,FULLCLUSTER,false >;
|
||||
friend EV_FLOAT pair_compute_neighlist<PairLJExpandKokkos,FULL,void>(PairLJExpandKokkos*,NeighListKokkos<DeviceType>*);
|
||||
friend EV_FLOAT pair_compute_neighlist<PairLJExpandKokkos,HALF,void>(PairLJExpandKokkos*,NeighListKokkos<DeviceType>*);
|
||||
friend EV_FLOAT pair_compute_neighlist<PairLJExpandKokkos,HALFTHREAD,void>(PairLJExpandKokkos*,NeighListKokkos<DeviceType>*);
|
||||
friend EV_FLOAT pair_compute_neighlist<PairLJExpandKokkos,N2,void>(PairLJExpandKokkos*,NeighListKokkos<DeviceType>*);
|
||||
friend EV_FLOAT pair_compute_fullcluster<PairLJExpandKokkos,void>(PairLJExpandKokkos*,NeighListKokkos<DeviceType>*);
|
||||
friend EV_FLOAT pair_compute<PairLJExpandKokkos,void>(PairLJExpandKokkos*,NeighListKokkos<DeviceType>*);
|
||||
friend void pair_virial_fdotr_compute<PairLJExpandKokkos>(PairLJExpandKokkos*);
|
||||
};
|
||||
|
||||
@ -101,7 +101,7 @@ void PairLJGromacsCoulGromacsKokkos<DeviceType>::compute(int eflag_in, int vflag
|
||||
eflag = eflag_in;
|
||||
vflag = vflag_in;
|
||||
|
||||
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
|
||||
if (neighflag == FULL) no_virial_fdotr_compute = 1;
|
||||
|
||||
if (eflag || vflag) ev_setup(eflag,vflag);
|
||||
else evflag = vflag_fdotr = 0;
|
||||
@ -439,11 +439,9 @@ void PairLJGromacsCoulGromacsKokkos<DeviceType>::init_style()
|
||||
if (neighflag == FULL) {
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
|
||||
neighbor->requests[irequest]->full = 0;
|
||||
neighbor->requests[irequest]->half = 1;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else {
|
||||
error->all(FLERR,"Cannot use chosen neighbor list style with lj/gromacs/coul/gromacs/kk");
|
||||
}
|
||||
|
||||
@ -98,7 +98,7 @@ void PairLJGromacsKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
||||
eflag = eflag_in;
|
||||
vflag = vflag_in;
|
||||
|
||||
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
|
||||
if (neighflag == FULL) no_virial_fdotr_compute = 1;
|
||||
|
||||
if (eflag || vflag) ev_setup(eflag,vflag);
|
||||
else evflag = vflag_fdotr = 0;
|
||||
@ -277,11 +277,9 @@ void PairLJGromacsKokkos<DeviceType>::init_style()
|
||||
if (neighflag == FULL) {
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
|
||||
neighbor->requests[irequest]->full = 0;
|
||||
neighbor->requests[irequest]->half = 1;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else {
|
||||
error->all(FLERR,"Cannot use chosen neighbor list style with lj/gromacs/kk");
|
||||
}
|
||||
|
||||
@ -86,7 +86,7 @@ void PairLJSDKKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
||||
vflag = vflag_in;
|
||||
|
||||
|
||||
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
|
||||
if (neighflag == FULL) no_virial_fdotr_compute = 1;
|
||||
|
||||
if (eflag || vflag) ev_setup(eflag,vflag);
|
||||
else evflag = vflag_fdotr = 0;
|
||||
@ -258,19 +258,12 @@ void PairLJSDKKokkos<DeviceType>::init_style()
|
||||
if (neighflag == FULL) {
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
|
||||
neighbor->requests[irequest]->full = 0;
|
||||
neighbor->requests[irequest]->half = 1;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == N2) {
|
||||
neighbor->requests[irequest]->full = 0;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == FULLCLUSTER) {
|
||||
neighbor->requests[irequest]->full_cluster = 1;
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
} else {
|
||||
error->all(FLERR,"Cannot use chosen neighbor list style with lj/sdk/kk");
|
||||
}
|
||||
|
||||
@ -31,7 +31,7 @@ namespace LAMMPS_NS {
|
||||
template<class DeviceType>
|
||||
class PairLJSDKKokkos : public PairLJSDK {
|
||||
public:
|
||||
enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF|N2|FULLCLUSTER};
|
||||
enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF|N2};
|
||||
enum {COUL_FLAG=0};
|
||||
typedef DeviceType device_type;
|
||||
PairLJSDKKokkos(class LAMMPS *);
|
||||
@ -97,17 +97,14 @@ class PairLJSDKKokkos : public PairLJSDK {
|
||||
friend class PairComputeFunctor<PairLJSDKKokkos,HALF,true>;
|
||||
friend class PairComputeFunctor<PairLJSDKKokkos,HALFTHREAD,true>;
|
||||
friend class PairComputeFunctor<PairLJSDKKokkos,N2,true>;
|
||||
friend class PairComputeFunctor<PairLJSDKKokkos,FULLCLUSTER,true >;
|
||||
friend class PairComputeFunctor<PairLJSDKKokkos,FULL,false>;
|
||||
friend class PairComputeFunctor<PairLJSDKKokkos,HALF,false>;
|
||||
friend class PairComputeFunctor<PairLJSDKKokkos,HALFTHREAD,false>;
|
||||
friend class PairComputeFunctor<PairLJSDKKokkos,N2,false>;
|
||||
friend class PairComputeFunctor<PairLJSDKKokkos,FULLCLUSTER,false >;
|
||||
friend EV_FLOAT pair_compute_neighlist<PairLJSDKKokkos,FULL,void>(PairLJSDKKokkos*,NeighListKokkos<DeviceType>*);
|
||||
friend EV_FLOAT pair_compute_neighlist<PairLJSDKKokkos,HALF,void>(PairLJSDKKokkos*,NeighListKokkos<DeviceType>*);
|
||||
friend EV_FLOAT pair_compute_neighlist<PairLJSDKKokkos,HALFTHREAD,void>(PairLJSDKKokkos*,NeighListKokkos<DeviceType>*);
|
||||
friend EV_FLOAT pair_compute_neighlist<PairLJSDKKokkos,N2,void>(PairLJSDKKokkos*,NeighListKokkos<DeviceType>*);
|
||||
friend EV_FLOAT pair_compute_fullcluster<PairLJSDKKokkos,void>(PairLJSDKKokkos*,NeighListKokkos<DeviceType>*);
|
||||
friend EV_FLOAT pair_compute<PairLJSDKKokkos,void>(PairLJSDKKokkos*,NeighListKokkos<DeviceType>*);
|
||||
friend void pair_virial_fdotr_compute<PairLJSDKKokkos>(PairLJSDKKokkos*);
|
||||
};
|
||||
|
||||
@ -146,12 +146,10 @@ void PairReaxCKokkos<DeviceType>::init_style()
|
||||
if (neighflag == FULL) {
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
neighbor->requests[irequest]->ghost = 1;
|
||||
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
|
||||
neighbor->requests[irequest]->full = 0;
|
||||
neighbor->requests[irequest]->half = 1;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
neighbor->requests[irequest]->ghost = 1;
|
||||
} else {
|
||||
error->all(FLERR,"Cannot use chosen neighbor list style with reax/c/kk");
|
||||
|
||||
@ -601,7 +601,6 @@ void PairSWKokkos<DeviceType>::init_style()
|
||||
if (neighflag == FULL || neighflag == HALF || neighflag == HALFTHREAD) {
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
if (neighflag == FULL)
|
||||
neighbor->requests[irequest]->ghost = 1;
|
||||
else
|
||||
|
||||
@ -96,7 +96,7 @@ void PairTableKokkos<DeviceType>::compute_style(int eflag_in, int vflag_in)
|
||||
eflag = eflag_in;
|
||||
vflag = vflag_in;
|
||||
|
||||
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
|
||||
if (neighflag == FULL) no_virial_fdotr_compute = 1;
|
||||
|
||||
if (eflag || vflag) ev_setup(eflag,vflag);
|
||||
else evflag = vflag_fdotr = 0;
|
||||
@ -142,19 +142,6 @@ void PairTableKokkos<DeviceType>::compute_style(int eflag_in, int vflag_in)
|
||||
f(this,(NeighListKokkos<DeviceType>*) list);
|
||||
if (eflag || vflag) Kokkos::parallel_reduce(nlocal,f,ev);
|
||||
else Kokkos::parallel_for(nlocal,f);
|
||||
} else if (neighflag == FULLCLUSTER) {
|
||||
typedef PairComputeFunctor<PairTableKokkos<DeviceType>,FULLCLUSTER,false,S_TableCompute<DeviceType,TABSTYLE> >
|
||||
f_type;
|
||||
f_type f(this,(NeighListKokkos<DeviceType>*) list);
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
const int teamsize = Kokkos::Impl::is_same<DeviceType, Kokkos::Cuda>::value ? 32 : 1;
|
||||
#else
|
||||
const int teamsize = 1;
|
||||
#endif
|
||||
const int nteams = (list->inum*+teamsize-1)/teamsize;
|
||||
Kokkos::TeamPolicy<DeviceType> config(nteams,teamsize,NeighClusterSize);
|
||||
if (eflag || vflag) Kokkos::parallel_reduce(config,f,ev);
|
||||
else Kokkos::parallel_for(config,f);
|
||||
}
|
||||
} else {
|
||||
if (neighflag == FULL) {
|
||||
@ -177,19 +164,6 @@ void PairTableKokkos<DeviceType>::compute_style(int eflag_in, int vflag_in)
|
||||
f(this,(NeighListKokkos<DeviceType>*) list);
|
||||
if (eflag || vflag) Kokkos::parallel_reduce(nlocal,f,ev);
|
||||
else Kokkos::parallel_for(nlocal,f);
|
||||
} else if (neighflag == FULLCLUSTER) {
|
||||
typedef PairComputeFunctor<PairTableKokkos<DeviceType>,FULLCLUSTER,true,S_TableCompute<DeviceType,TABSTYLE> >
|
||||
f_type;
|
||||
f_type f(this,(NeighListKokkos<DeviceType>*) list);
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
const int teamsize = Kokkos::Impl::is_same<DeviceType, Kokkos::Cuda>::value ? 32 : 1;
|
||||
#else
|
||||
const int teamsize = 1;
|
||||
#endif
|
||||
const int nteams = (list->inum*+teamsize-1)/teamsize;
|
||||
Kokkos::TeamPolicy<DeviceType> config(nteams,teamsize,NeighClusterSize);
|
||||
if (eflag || vflag) Kokkos::parallel_reduce(config,f,ev);
|
||||
else Kokkos::parallel_for(config,f);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1261,19 +1235,12 @@ void PairTableKokkos<DeviceType>::init_style()
|
||||
if (neighflag == FULL) {
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
|
||||
neighbor->requests[irequest]->full = 0;
|
||||
neighbor->requests[irequest]->half = 1;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == N2) {
|
||||
neighbor->requests[irequest]->full = 0;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == FULLCLUSTER) {
|
||||
neighbor->requests[irequest]->full_cluster = 1;
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
} else {
|
||||
error->all(FLERR,"Cannot use chosen neighbor list style with lj/cut/kk");
|
||||
}
|
||||
|
||||
@ -41,7 +41,7 @@ template<class DeviceType>
|
||||
class PairTableKokkos : public Pair {
|
||||
public:
|
||||
|
||||
enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF|N2|FULLCLUSTER};
|
||||
enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF|N2};
|
||||
enum {COUL_FLAG=0};
|
||||
typedef DeviceType device_type;
|
||||
|
||||
@ -170,45 +170,37 @@ class PairTableKokkos : public Pair {
|
||||
friend class PairComputeFunctor<PairTableKokkos,HALF,true,S_TableCompute<DeviceType,LOOKUP> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,HALFTHREAD,true,S_TableCompute<DeviceType,LOOKUP> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,N2,true,S_TableCompute<DeviceType,LOOKUP> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,FULLCLUSTER,true,S_TableCompute<DeviceType,LOOKUP> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,FULL,false,S_TableCompute<DeviceType,LOOKUP> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,HALF,false,S_TableCompute<DeviceType,LOOKUP> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,HALFTHREAD,false,S_TableCompute<DeviceType,LOOKUP> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,N2,false,S_TableCompute<DeviceType,LOOKUP> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,FULLCLUSTER,false,S_TableCompute<DeviceType,LOOKUP> >;
|
||||
|
||||
friend class PairComputeFunctor<PairTableKokkos,FULL,true,S_TableCompute<DeviceType,LINEAR> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,HALF,true,S_TableCompute<DeviceType,LINEAR> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,HALFTHREAD,true,S_TableCompute<DeviceType,LINEAR> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,N2,true,S_TableCompute<DeviceType,LINEAR> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,FULLCLUSTER,true,S_TableCompute<DeviceType,LINEAR> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,FULL,false,S_TableCompute<DeviceType,LINEAR> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,HALF,false,S_TableCompute<DeviceType,LINEAR> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,HALFTHREAD,false,S_TableCompute<DeviceType,LINEAR> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,N2,false,S_TableCompute<DeviceType,LINEAR> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,FULLCLUSTER,false,S_TableCompute<DeviceType,LINEAR> >;
|
||||
|
||||
friend class PairComputeFunctor<PairTableKokkos,FULL,true,S_TableCompute<DeviceType,SPLINE> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,HALF,true,S_TableCompute<DeviceType,SPLINE> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,HALFTHREAD,true,S_TableCompute<DeviceType,SPLINE> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,N2,true,S_TableCompute<DeviceType,SPLINE> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,FULLCLUSTER,true,S_TableCompute<DeviceType,SPLINE> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,FULL,false,S_TableCompute<DeviceType,SPLINE> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,HALF,false,S_TableCompute<DeviceType,SPLINE> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,HALFTHREAD,false,S_TableCompute<DeviceType,SPLINE> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,N2,false,S_TableCompute<DeviceType,SPLINE> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,FULLCLUSTER,false,S_TableCompute<DeviceType,SPLINE> >;
|
||||
|
||||
friend class PairComputeFunctor<PairTableKokkos,FULL,true,S_TableCompute<DeviceType,BITMAP> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,HALF,true,S_TableCompute<DeviceType,BITMAP> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,HALFTHREAD,true,S_TableCompute<DeviceType,BITMAP> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,N2,true,S_TableCompute<DeviceType,BITMAP> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,FULLCLUSTER,true,S_TableCompute<DeviceType,BITMAP> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,FULL,false,S_TableCompute<DeviceType,BITMAP> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,HALF,false,S_TableCompute<DeviceType,BITMAP> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,HALFTHREAD,false,S_TableCompute<DeviceType,BITMAP> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,N2,false,S_TableCompute<DeviceType,BITMAP> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,FULLCLUSTER,false,S_TableCompute<DeviceType,BITMAP> >;
|
||||
|
||||
friend void pair_virial_fdotr_compute<PairTableKokkos>(PairTableKokkos*);
|
||||
};
|
||||
|
||||
@ -103,7 +103,6 @@ void PairTersoffKokkos<DeviceType>::init_style()
|
||||
//if (neighflag == FULL || neighflag == HALFTHREAD) {
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
if (neighflag == FULL)
|
||||
neighbor->requests[irequest]->ghost = 1;
|
||||
else
|
||||
|
||||
@ -102,7 +102,6 @@ void PairTersoffMODKokkos<DeviceType>::init_style()
|
||||
if (neighflag == FULL || neighflag == HALF || neighflag == HALFTHREAD) {
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
if (neighflag == FULL)
|
||||
neighbor->requests[irequest]->ghost = 1;
|
||||
else
|
||||
|
||||
@ -113,7 +113,6 @@ void PairTersoffZBLKokkos<DeviceType>::init_style()
|
||||
if (neighflag == FULL || neighflag == HALF || neighflag == HALFTHREAD) {
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
if (neighflag == FULL)
|
||||
neighbor->requests[irequest]->ghost = 1;
|
||||
else
|
||||
|
||||
11
src/Make.sh
11
src/Make.sh
@ -59,8 +59,9 @@ style () {
|
||||
# called by "make machine"
|
||||
# col 1 = string to search for
|
||||
# col 2 = search in *.h files starting with this name
|
||||
# col 3 = prefix of style file
|
||||
# col 4
|
||||
# col 3 = name of style file
|
||||
# col 4 = file that includes the style file
|
||||
# col 5 = optional 2nd file that includes the style file
|
||||
|
||||
if (test $1 = "style") then
|
||||
|
||||
@ -69,7 +70,7 @@ if (test $1 = "style") then
|
||||
style BODY_CLASS body_ body atom_vec_body
|
||||
style BOND_CLASS bond_ bond force
|
||||
style COMMAND_CLASS "" command input
|
||||
style COMPUTE_CLASS compute_ compute modify modify_cuda
|
||||
style COMPUTE_CLASS compute_ compute modify
|
||||
style DIHEDRAL_CLASS dihedral_ dihedral force
|
||||
style DUMP_CLASS dump_ dump output write_dump
|
||||
style FIX_CLASS fix_ fix modify
|
||||
@ -77,6 +78,10 @@ if (test $1 = "style") then
|
||||
style INTEGRATE_CLASS "" integrate update
|
||||
style KSPACE_CLASS "" kspace force
|
||||
style MINIMIZE_CLASS min_ minimize update
|
||||
style NBIN_CLASS nbin_ nbin neighbor
|
||||
style NPAIR_CLASS npair_ npair neighbor
|
||||
style NSTENCIL_CLASS nstencil_ nstencil neighbor
|
||||
style NTOPO_CLASS ntopo_ ntopo neighbor
|
||||
style PAIR_CLASS pair_ pair force
|
||||
style READER_CLASS reader_ reader read_dump
|
||||
style REGION_CLASS region_ region domain
|
||||
|
||||
@ -13,6 +13,40 @@ style_kspace.h
|
||||
style_minimize.h
|
||||
style_pair.h
|
||||
style_region.h
|
||||
style_neigh_bin.h
|
||||
style_neigh_pair.h
|
||||
style_neigh_stencil.h
|
||||
# deleted on ## XXX 2016
|
||||
accelerator_intel.h
|
||||
neigh_bond.cpp
|
||||
neigh_bond.h
|
||||
neigh_derive.cpp
|
||||
neigh_derive.h
|
||||
neigh_full.cpp
|
||||
neigh_full.h
|
||||
neigh_gran.cpp
|
||||
neigh_gran.h
|
||||
neigh_half_bin.cpp
|
||||
neigh_half_bin.h
|
||||
neigh_half_multi.cpp
|
||||
neigh_half_multi.h
|
||||
neigh_half_nsq.cpp
|
||||
neigh_half_nsq.h
|
||||
neigh_respa.cpp
|
||||
neigh_respa.h
|
||||
neigh_shardlow.cpp
|
||||
neigh_shardlow.h
|
||||
neigh_stencil.cpp
|
||||
neigh_half_bin_intel.cpp
|
||||
neigh_full_kokkos.h
|
||||
neighbor_omp.h
|
||||
neigh_derive_omp.cpp
|
||||
neigh_full_omp.cpp
|
||||
neigh_gran_omp.cpp
|
||||
neigh_half_bin_omp.cpp
|
||||
neigh_half_multi_omp.cpp
|
||||
neigh_half_nsq_omp.cpp
|
||||
neigh_respa_omp.cpp
|
||||
# deleted on 20 Sep 2016
|
||||
fix_ti_rs.cpp
|
||||
fix_ti_rs.h
|
||||
|
||||
@ -47,6 +47,7 @@
|
||||
#include "comm.h"
|
||||
#include "neighbor.h"
|
||||
#include "neigh_list.h"
|
||||
#include "neigh_request.h"
|
||||
#include "random_mars.h"
|
||||
#include "memory.h"
|
||||
#include "domain.h"
|
||||
@ -139,6 +140,23 @@ int FixShardlow::setmask()
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void FixShardlow::init()
|
||||
{
|
||||
int irequest = neighbor->request(this,instance_me);
|
||||
neighbor->requests[irequest]->pair = 0;
|
||||
neighbor->requests[irequest]->fix = 1;
|
||||
neighbor->requests[irequest]->ssa = 1;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void FixShardlow::init_list(int id, NeighList *ptr)
|
||||
{
|
||||
list = ptr;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void FixShardlow::pre_exchange()
|
||||
{
|
||||
memset(atom->ssaAIR, 0, sizeof(int)*atom->nlocal);
|
||||
@ -217,7 +235,6 @@ void FixShardlow::ssa_update(
|
||||
int newton_pair = force->newton_pair;
|
||||
double randPair;
|
||||
|
||||
int *ssaAIR = atom->ssaAIR;
|
||||
double *uCond = atom->uCond;
|
||||
double *uMech = atom->uMech;
|
||||
double *dpdTheta = atom->dpdTheta;
|
||||
@ -411,7 +428,6 @@ void FixShardlow::initial_integrate(int vflag)
|
||||
int nghost = atom->nghost;
|
||||
|
||||
int airnum;
|
||||
class NeighList *list; // points to list in pairDPD or pairDPDE
|
||||
class RanMars *pRNG;
|
||||
|
||||
// NOTE: this logic is specific to orthogonal boxes, not triclinic
|
||||
@ -432,12 +448,10 @@ void FixShardlow::initial_integrate(int vflag)
|
||||
// Allocate memory for v_t0 to hold the initial velocities for the ghosts
|
||||
v_t0 = (double (*)[3]) memory->smalloc(sizeof(double)*3*nghost, "FixShardlow:v_t0");
|
||||
|
||||
// Define pointers to access the neighbor list and RNG
|
||||
// Define pointers to access the RNG
|
||||
if(pairDPDE){
|
||||
list = pairDPDE->list;
|
||||
pRNG = pairDPDE->random;
|
||||
} else {
|
||||
list = pairDPD->list;
|
||||
pRNG = pairDPD->random;
|
||||
}
|
||||
inum = list->inum;
|
||||
|
||||
@ -26,9 +26,13 @@ namespace LAMMPS_NS {
|
||||
|
||||
class FixShardlow : public Fix {
|
||||
public:
|
||||
class NeighList *list; // The SSA specific neighbor list
|
||||
|
||||
FixShardlow(class LAMMPS *, int, char **);
|
||||
~FixShardlow();
|
||||
int setmask();
|
||||
virtual void init();
|
||||
virtual void init_list(int, class NeighList *);
|
||||
virtual void setup(int);
|
||||
virtual void initial_integrate(int);
|
||||
void setup_pre_exchange();
|
||||
|
||||
129
src/USER-DPD/nbin_ssa.cpp
Normal file
129
src/USER-DPD/nbin_ssa.cpp
Normal file
@ -0,0 +1,129 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing authors:
|
||||
James Larentzos (ARL) and Timothy I. Mattox (Engility Corporation)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "nbin_ssa.h"
|
||||
#include "atom.h"
|
||||
#include "group.h"
|
||||
#include "memory.h"
|
||||
#include "error.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
NBinSSA::NBinSSA(LAMMPS *lmp) : NBinStandard(lmp)
|
||||
{
|
||||
maxbin_ssa = 0;
|
||||
bins_ssa = NULL;
|
||||
maxhead_ssa = 0;
|
||||
binhead_ssa = NULL;
|
||||
gbinhead_ssa = NULL;
|
||||
}
|
||||
|
||||
NBinSSA::~NBinSSA()
|
||||
{
|
||||
memory->destroy(bins_ssa);
|
||||
memory->destroy(binhead_ssa);
|
||||
memory->destroy(gbinhead_ssa);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
bin owned and ghost atoms for the Shardlow Splitting Algorithm (SSA)
|
||||
local atoms are in distinct bins (binhead_ssa) from the ghosts
|
||||
ghost atoms are in distinct bins (gbinhead_ssa) from the locals
|
||||
ghosts which are not in an Active Interaction Region (AIR) are skipped
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void NBinSSA::bin_atoms()
|
||||
{
|
||||
int i,ibin;
|
||||
int nlocal = atom->nlocal;
|
||||
int nall = nlocal + atom->nghost;
|
||||
if (includegroup) nlocal = atom->nfirst;
|
||||
double **x = atom->x;
|
||||
int *mask = atom->mask;
|
||||
int *ssaAIR = atom->ssaAIR;
|
||||
|
||||
for (i = 0; i < mbins; i++) {
|
||||
gbinhead_ssa[i] = -1;
|
||||
binhead_ssa[i] = -1;
|
||||
}
|
||||
|
||||
// bin in reverse order so linked list will be in forward order
|
||||
|
||||
if (includegroup) {
|
||||
int bitmask = group->bitmask[includegroup];
|
||||
int nowned = atom->nlocal; // NOTE: nlocal was set to atom->nfirst above
|
||||
for (i = nall-1; i >= nowned; i--) {
|
||||
if (ssaAIR[i] < 2) continue; // skip ghost atoms not in AIR
|
||||
if (mask[i] & bitmask) {
|
||||
ibin = coord2bin(x[i]);
|
||||
bins_ssa[i] = gbinhead_ssa[ibin];
|
||||
gbinhead_ssa[ibin] = i;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (i = nall-1; i >= nlocal; i--) {
|
||||
if (ssaAIR[i] < 2) continue; // skip ghost atoms not in AIR
|
||||
ibin = coord2bin(x[i]);
|
||||
bins_ssa[i] = gbinhead_ssa[ibin];
|
||||
gbinhead_ssa[ibin] = i;
|
||||
}
|
||||
}
|
||||
for (i = nlocal-1; i >= 0; i--) {
|
||||
ibin = coord2bin(x[i]);
|
||||
bins_ssa[i] = binhead_ssa[ibin];
|
||||
binhead_ssa[ibin] = i;
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void NBinSSA::bin_atoms_setup(int nall)
|
||||
{
|
||||
NBinStandard::bin_atoms_setup(nall); // Setup the parent class's data too
|
||||
|
||||
if (mbins > maxhead_ssa) {
|
||||
maxhead_ssa = mbins;
|
||||
memory->destroy(gbinhead_ssa);
|
||||
memory->destroy(binhead_ssa);
|
||||
memory->create(binhead_ssa,maxhead_ssa,"binhead_ssa");
|
||||
memory->create(gbinhead_ssa,maxhead_ssa,"gbinhead_ssa");
|
||||
}
|
||||
|
||||
if (nall > maxbin_ssa) {
|
||||
maxbin_ssa = nall;
|
||||
memory->destroy(bins_ssa);
|
||||
memory->create(bins_ssa,maxbin_ssa,"bins_ssa");
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
bigint NBinSSA::memory_usage()
|
||||
{
|
||||
bigint bytes = NBinStandard::memory_usage(); // Count the parent's usage too
|
||||
|
||||
if (maxbin_ssa) bytes += memory->usage(bins_ssa,maxbin_ssa);
|
||||
if (maxhead_ssa) {
|
||||
bytes += memory->usage(binhead_ssa,maxhead_ssa);
|
||||
bytes += memory->usage(gbinhead_ssa,maxhead_ssa);
|
||||
}
|
||||
return bytes;
|
||||
}
|
||||
|
||||
54
src/USER-DPD/nbin_ssa.h
Normal file
54
src/USER-DPD/nbin_ssa.h
Normal file
@ -0,0 +1,54 @@
|
||||
/* -*- c++ -*- ----------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef NBIN_CLASS
|
||||
|
||||
NBinStyle(ssa,
|
||||
NBinSSA,
|
||||
NB_SSA)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_NBIN_SSA_H
|
||||
#define LMP_NBIN_SSA_H
|
||||
|
||||
#include "nbin_standard.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class NBinSSA : public NBinStandard {
|
||||
public:
|
||||
|
||||
int *bins_ssa; // index of next atom in each bin
|
||||
int maxbin_ssa; // size of bins_ssa array
|
||||
int *binhead_ssa; // index of 1st local atom in each bin
|
||||
int *gbinhead_ssa; // index of 1st ghost atom in each bin
|
||||
int maxhead_ssa; // size of binhead_ssa and gbinhead_ssa arrays
|
||||
|
||||
NBinSSA(class LAMMPS *);
|
||||
~NBinSSA();
|
||||
|
||||
void bin_atoms_setup(int);
|
||||
void bin_atoms();
|
||||
|
||||
bigint memory_usage();
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* ERROR/WARNING messages:
|
||||
|
||||
*/
|
||||
260
src/USER-DPD/npair_half_bin_newton_ssa.cpp
Normal file
260
src/USER-DPD/npair_half_bin_newton_ssa.cpp
Normal file
@ -0,0 +1,260 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing authors:
|
||||
James Larentzos and Timothy I. Mattox (Engility Corporation)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "npair_half_bin_newton_ssa.h"
|
||||
#include "neighbor.h"
|
||||
#include "nstencil_ssa.h"
|
||||
#include "nbin_ssa.h"
|
||||
#include "neigh_list.h"
|
||||
#include "atom.h"
|
||||
#include "atom_vec.h"
|
||||
#include "molecule.h"
|
||||
#include "domain.h"
|
||||
#include "group.h"
|
||||
#include "memory.h"
|
||||
#include "my_page.h"
|
||||
#include "error.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
// allocate space for static class variable
|
||||
// prototype for non-class function
|
||||
|
||||
static int *ssaAIRptr;
|
||||
static int cmp_ssaAIR(const void *, const void *);
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
NPairHalfBinNewtonSSA::NPairHalfBinNewtonSSA(LAMMPS *lmp) : NPair(lmp) {}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
binned neighbor list construction with full Newton's 3rd law
|
||||
for use by Shardlow Spliting Algorithm
|
||||
each owned atom i checks its own bin and other bins in Newton stencil
|
||||
every pair stored exactly once by some processor
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void NPairHalfBinNewtonSSA::build(NeighList *list)
|
||||
{
|
||||
int i,j,k,n,itype,jtype,ibin,which,imol,iatom,moltemplate;
|
||||
tagint tagprev;
|
||||
double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
|
||||
int *neighptr;
|
||||
|
||||
double **x = atom->x;
|
||||
int *type = atom->type;
|
||||
int *mask = atom->mask;
|
||||
tagint *tag = atom->tag;
|
||||
tagint *molecule = atom->molecule;
|
||||
tagint **special = atom->special;
|
||||
int **nspecial = atom->nspecial;
|
||||
int nlocal = atom->nlocal;
|
||||
int nall = nlocal + atom->nghost;
|
||||
if (includegroup) nlocal = atom->nfirst;
|
||||
int *ssaAIR = atom->ssaAIR;
|
||||
|
||||
int *molindex = atom->molindex;
|
||||
int *molatom = atom->molatom;
|
||||
Molecule **onemols = atom->avec->onemols;
|
||||
int molecular = atom->molecular;
|
||||
if (molecular == 2) moltemplate = 1;
|
||||
else moltemplate = 0;
|
||||
|
||||
int *ilist = list->ilist;
|
||||
int *numneigh = list->numneigh;
|
||||
int **firstneigh = list->firstneigh;
|
||||
MyPage<int> *ipage = list->ipage;
|
||||
|
||||
NStencilSSA *ns_ssa = dynamic_cast<NStencilSSA*>(ns);
|
||||
if (!ns_ssa) error->one(FLERR, "NStencil wasn't a NStencilSSA object");
|
||||
int nstencil_half = ns_ssa->nstencil_half;
|
||||
int nstencil_full = ns_ssa->nstencil;
|
||||
|
||||
NBinSSA *nb_ssa = dynamic_cast<NBinSSA*>(nb);
|
||||
if (!nb_ssa) error->one(FLERR, "NBin wasn't a NBinSSA object");
|
||||
int *bins_ssa = nb_ssa->bins_ssa;
|
||||
int *binhead_ssa = nb_ssa->binhead_ssa;
|
||||
int *gbinhead_ssa = nb_ssa->gbinhead_ssa;
|
||||
|
||||
int inum = 0;
|
||||
|
||||
ipage->reset();
|
||||
|
||||
// loop over owned atoms, storing half of the neighbors
|
||||
|
||||
for (i = 0; i < nlocal; i++) {
|
||||
int AIRct[8] = { 0 };
|
||||
n = 0;
|
||||
neighptr = ipage->vget();
|
||||
|
||||
itype = type[i];
|
||||
xtmp = x[i][0];
|
||||
ytmp = x[i][1];
|
||||
ztmp = x[i][2];
|
||||
if (moltemplate) {
|
||||
imol = molindex[i];
|
||||
iatom = molatom[i];
|
||||
tagprev = tag[i] - iatom - 1;
|
||||
}
|
||||
|
||||
// loop over rest of local atoms in i's bin
|
||||
// just store them, since j is beyond i in linked list
|
||||
|
||||
for (j = bins_ssa[i]; j >= 0; j = bins_ssa[j]) {
|
||||
|
||||
jtype = type[j];
|
||||
if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
|
||||
|
||||
delx = xtmp - x[j][0];
|
||||
dely = ytmp - x[j][1];
|
||||
delz = ztmp - x[j][2];
|
||||
rsq = delx*delx + dely*dely + delz*delz;
|
||||
|
||||
if (rsq <= cutneighsq[itype][jtype]) {
|
||||
if (molecular) {
|
||||
if (!moltemplate)
|
||||
which = find_special(special[i],nspecial[i],tag[j]);
|
||||
else if (imol >= 0)
|
||||
which = find_special(onemols[imol]->special[iatom],
|
||||
onemols[imol]->nspecial[iatom],
|
||||
tag[j]-tagprev);
|
||||
else which = 0;
|
||||
if (which == 0) neighptr[n++] = j;
|
||||
else if (domain->minimum_image_check(delx,dely,delz))
|
||||
neighptr[n++] = j;
|
||||
else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
|
||||
} else neighptr[n++] = j;
|
||||
}
|
||||
}
|
||||
|
||||
ibin = coord2bin(x[i]);
|
||||
|
||||
// loop over all local atoms in other bins in "half" stencil
|
||||
|
||||
for (k = 0; k < nstencil_half; k++) {
|
||||
for (j = binhead_ssa[ibin+stencil[k]]; j >= 0;
|
||||
j = bins_ssa[j]) {
|
||||
|
||||
jtype = type[j];
|
||||
if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
|
||||
|
||||
delx = xtmp - x[j][0];
|
||||
dely = ytmp - x[j][1];
|
||||
delz = ztmp - x[j][2];
|
||||
rsq = delx*delx + dely*dely + delz*delz;
|
||||
|
||||
if (rsq <= cutneighsq[itype][jtype]) {
|
||||
if (molecular) {
|
||||
if (!moltemplate)
|
||||
which = find_special(special[i],nspecial[i],tag[j]);
|
||||
else if (imol >= 0)
|
||||
which = find_special(onemols[imol]->special[iatom],
|
||||
onemols[imol]->nspecial[iatom],
|
||||
tag[j]-tagprev);
|
||||
else which = 0;
|
||||
if (which == 0) neighptr[n++] = j;
|
||||
else if (domain->minimum_image_check(delx,dely,delz))
|
||||
neighptr[n++] = j;
|
||||
else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
|
||||
} else neighptr[n++] = j;
|
||||
}
|
||||
}
|
||||
}
|
||||
AIRct[0] = n;
|
||||
|
||||
// loop over AIR ghost atoms in all bins in "full" stencil
|
||||
// Note: the non-AIR ghost atoms have already been filtered out
|
||||
// That is a significant time savings because of the "full" stencil
|
||||
// Note2: only non-pure locals can have ghosts as neighbors
|
||||
|
||||
if (ssaAIR[i] == 1) for (k = 0; k < nstencil_full; k++) {
|
||||
for (j = gbinhead_ssa[ibin+stencil[k]]; j >= 0;
|
||||
j = bins_ssa[j]) {
|
||||
|
||||
jtype = type[j];
|
||||
if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
|
||||
|
||||
delx = xtmp - x[j][0];
|
||||
dely = ytmp - x[j][1];
|
||||
delz = ztmp - x[j][2];
|
||||
rsq = delx*delx + dely*dely + delz*delz;
|
||||
|
||||
if (rsq <= cutneighsq[itype][jtype]) {
|
||||
if (molecular) {
|
||||
if (!moltemplate)
|
||||
which = find_special(special[i],nspecial[i],tag[j]);
|
||||
else if (imol >= 0)
|
||||
which = find_special(onemols[imol]->special[iatom],
|
||||
onemols[imol]->nspecial[iatom],
|
||||
tag[j]-tagprev);
|
||||
else which = 0;
|
||||
if (which == 0) {
|
||||
neighptr[n++] = j;
|
||||
++(AIRct[ssaAIR[j] - 1]);
|
||||
} else if (domain->minimum_image_check(delx,dely,delz)) {
|
||||
neighptr[n++] = j;
|
||||
++(AIRct[ssaAIR[j] - 1]);
|
||||
} else if (which > 0) {
|
||||
neighptr[n++] = j ^ (which << SBBITS);
|
||||
++(AIRct[ssaAIR[j] - 1]);
|
||||
}
|
||||
} else {
|
||||
neighptr[n++] = j;
|
||||
++(AIRct[ssaAIR[j] - 1]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ilist[inum++] = i;
|
||||
firstneigh[i] = neighptr;
|
||||
numneigh[i] = n;
|
||||
ipage->vgot(n);
|
||||
if (ipage->status())
|
||||
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
|
||||
|
||||
// sort the ghosts in the neighbor list by their ssaAIR number
|
||||
|
||||
ssaAIRptr = atom->ssaAIR;
|
||||
qsort(&(neighptr[AIRct[0]]), n - AIRct[0], sizeof(int), cmp_ssaAIR);
|
||||
|
||||
// do a prefix sum on the counts to turn them into indexes
|
||||
|
||||
list->ndxAIR_ssa[i][0] = AIRct[0];
|
||||
for (int ndx = 1; ndx < 8; ++ndx) {
|
||||
list->ndxAIR_ssa[i][ndx] = AIRct[ndx] + list->ndxAIR_ssa[i][ndx - 1];
|
||||
}
|
||||
}
|
||||
|
||||
list->inum = inum;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
comparison function invoked by qsort()
|
||||
accesses static class member ssaAIRptr, set before call to qsort()
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
static int cmp_ssaAIR(const void *iptr, const void *jptr)
|
||||
{
|
||||
int i = *((int *) iptr);
|
||||
int j = *((int *) jptr);
|
||||
if (ssaAIRptr[i] < ssaAIRptr[j]) return -1;
|
||||
if (ssaAIRptr[i] > ssaAIRptr[j]) return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
43
src/USER-DPD/npair_half_bin_newton_ssa.h
Normal file
43
src/USER-DPD/npair_half_bin_newton_ssa.h
Normal file
@ -0,0 +1,43 @@
|
||||
/* -*- c++ -*- ----------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef NPAIR_CLASS
|
||||
|
||||
NPairStyle(half/bin/newton/ssa,
|
||||
NPairHalfBinNewtonSSA,
|
||||
NP_HALF | NP_BIN | NP_NEWTON | NP_ORTHO | NP_SSA)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_NPAIR_HALF_BIN_NEWTON_SSA_H
|
||||
#define LMP_NPAIR_HALF_BIN_NEWTON_SSA_H
|
||||
|
||||
#include "npair.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class NPairHalfBinNewtonSSA : public NPair {
|
||||
public:
|
||||
NPairHalfBinNewtonSSA(class LAMMPS *);
|
||||
~NPairHalfBinNewtonSSA() {}
|
||||
void build(class NeighList *);
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* ERROR/WARNING messages:
|
||||
|
||||
*/
|
||||
132
src/USER-DPD/npair_halffull_newton_ssa.cpp
Normal file
132
src/USER-DPD/npair_halffull_newton_ssa.cpp
Normal file
@ -0,0 +1,132 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing authors:
|
||||
James Larentzos and Timothy I. Mattox (Engility Corporation)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "npair_halffull_newton_ssa.h"
|
||||
#include "neighbor.h"
|
||||
#include "neigh_list.h"
|
||||
#include "atom.h"
|
||||
#include "atom_vec.h"
|
||||
#include "molecule.h"
|
||||
#include "domain.h"
|
||||
#include "my_page.h"
|
||||
#include "error.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
// allocate space for static class variable
|
||||
// prototype for non-class function
|
||||
|
||||
static int *ssaAIRptr;
|
||||
static int cmp_ssaAIR(const void *, const void *);
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
NPairHalffullNewtonSSA::NPairHalffullNewtonSSA(LAMMPS *lmp) : NPair(lmp) {}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
build half list from full list for use by Shardlow Spliting Algorithm
|
||||
pair stored once if i,j are both owned and i < j
|
||||
if j is ghost, only store if j coords are "above and to the right" of i
|
||||
works if full list is a skip list
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void NPairHalffullNewtonSSA::build(NeighList *list)
|
||||
{
|
||||
int i,j,ii,jj,n,jnum,joriginal;
|
||||
int *neighptr,*jlist;
|
||||
|
||||
int nlocal = atom->nlocal;
|
||||
int *ssaAIR = atom->ssaAIR;
|
||||
|
||||
int *ilist = list->ilist;
|
||||
int *numneigh = list->numneigh;
|
||||
int **firstneigh = list->firstneigh;
|
||||
MyPage<int> *ipage = list->ipage;
|
||||
|
||||
int *ilist_full = list->listfull->ilist;
|
||||
int *numneigh_full = list->listfull->numneigh;
|
||||
int **firstneigh_full = list->listfull->firstneigh;
|
||||
int inum_full = list->listfull->inum;
|
||||
|
||||
int inum = 0;
|
||||
ipage->reset();
|
||||
|
||||
// loop over parent full list
|
||||
|
||||
for (ii = 0; ii < inum_full; ii++) {
|
||||
int AIRct[8] = { 0 };
|
||||
n = 0;
|
||||
neighptr = ipage->vget();
|
||||
|
||||
i = ilist_full[ii];
|
||||
|
||||
// loop over full neighbor list
|
||||
|
||||
jlist = firstneigh_full[i];
|
||||
jnum = numneigh_full[i];
|
||||
|
||||
for (jj = 0; jj < jnum; jj++) {
|
||||
joriginal = jlist[jj];
|
||||
j = joriginal & NEIGHMASK;
|
||||
if (j < nlocal) {
|
||||
if (i > j) continue;
|
||||
++(AIRct[0]);
|
||||
} else {
|
||||
if (ssaAIR[j] < 2) continue; // skip ghost atoms not in AIR
|
||||
++(AIRct[ssaAIR[j] - 1]);
|
||||
}
|
||||
neighptr[n++] = joriginal;
|
||||
}
|
||||
|
||||
ilist[inum++] = i;
|
||||
firstneigh[i] = neighptr;
|
||||
numneigh[i] = n;
|
||||
ipage->vgot(n);
|
||||
if (ipage->status())
|
||||
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
|
||||
|
||||
// sort the locals+ghosts in the neighbor list by their ssaAIR number
|
||||
|
||||
ssaAIRptr = atom->ssaAIR;
|
||||
qsort(&(neighptr[0]), n, sizeof(int), cmp_ssaAIR);
|
||||
|
||||
// do a prefix sum on the counts to turn them into indexes
|
||||
|
||||
list->ndxAIR_ssa[i][0] = AIRct[0];
|
||||
for (int ndx = 1; ndx < 8; ++ndx) {
|
||||
list->ndxAIR_ssa[i][ndx] = AIRct[ndx] + list->ndxAIR_ssa[i][ndx - 1];
|
||||
}
|
||||
}
|
||||
|
||||
list->inum = inum;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
comparison function invoked by qsort()
|
||||
accesses static class member ssaAIRptr, set before call to qsort()
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
static int cmp_ssaAIR(const void *iptr, const void *jptr)
|
||||
{
|
||||
int i = *((int *) iptr);
|
||||
int j = *((int *) jptr);
|
||||
if (ssaAIRptr[i] < ssaAIRptr[j]) return -1;
|
||||
if (ssaAIRptr[i] > ssaAIRptr[j]) return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
44
src/USER-DPD/npair_halffull_newton_ssa.h
Normal file
44
src/USER-DPD/npair_halffull_newton_ssa.h
Normal file
@ -0,0 +1,44 @@
|
||||
/* -*- c++ -*- ----------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef NPAIR_CLASS
|
||||
|
||||
NPairStyle(halffull/newton/ssa,
|
||||
NPairHalffullNewtonSSA,
|
||||
NP_HALFFULL | NP_NSQ | NP_BIN | NP_MULTI | NP_NEWTON |
|
||||
NP_ORTHO | NP_TRI | NP_SSA)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_NPAIR_HALFFULL_NEWTON_SSA_H
|
||||
#define LMP_NPAIR_HALFFULL_NEWTON_SSA_H
|
||||
|
||||
#include "npair.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class NPairHalffullNewtonSSA : public NPair {
|
||||
public:
|
||||
NPairHalffullNewtonSSA(class LAMMPS *);
|
||||
~NPairHalffullNewtonSSA() {}
|
||||
void build(class NeighList *);
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* ERROR/WARNING messages:
|
||||
|
||||
*/
|
||||
64
src/USER-DPD/nstencil_half_bin_2d_newton_ssa.cpp
Normal file
64
src/USER-DPD/nstencil_half_bin_2d_newton_ssa.cpp
Normal file
@ -0,0 +1,64 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing authors:
|
||||
James Larentzos and Timothy I. Mattox (Engility Corporation)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "nstencil_half_bin_2d_newton_ssa.h"
|
||||
#include "neighbor.h"
|
||||
#include "neigh_list.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
NStencilHalfBin2dNewtonSSA::NStencilHalfBin2dNewtonSSA(LAMMPS *lmp) :
|
||||
NStencilSSA(lmp) {}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
create stencil based on bin geometry and cutoff
|
||||
stencil = bins whose closest corner to central bin is within cutoff
|
||||
sx,sy,sz = bin bounds = furthest the stencil could possibly extend
|
||||
3d creates xyz stencil, 2d creates xy stencil
|
||||
for half list with newton on:
|
||||
stencil is bins to the "upper right" of central bin
|
||||
stencil does not include self
|
||||
additionally, includes the bins beyond nstencil that are needed
|
||||
to locate all the Active Interaction Region (AIR) ghosts for SSA
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void NStencilHalfBin2dNewtonSSA::create()
|
||||
{
|
||||
int i,j,pos = 0;
|
||||
|
||||
for (j = 0; j <= sy; j++)
|
||||
for (i = -sx; i <= sx; i++)
|
||||
if (j > 0 || (j == 0 && i > 0))
|
||||
if (bin_distance(i,j,0) < cutneighmaxsq)
|
||||
stencil[pos++] = j*mbinx + i;
|
||||
|
||||
nstencil_half = pos; // record where normal half stencil ends
|
||||
|
||||
// include additional bins for AIR ghosts only
|
||||
|
||||
for (j = -sy; j <= 0; j++)
|
||||
for (i = -sx; i <= sx; i++) {
|
||||
if (j == 0 && i > 0) continue;
|
||||
if (bin_distance(i,j,0) < cutneighmaxsq)
|
||||
stencil[pos++] = j*mbinx + i;
|
||||
}
|
||||
|
||||
nstencil = pos; // record where full stencil ends
|
||||
}
|
||||
43
src/USER-DPD/nstencil_half_bin_2d_newton_ssa.h
Normal file
43
src/USER-DPD/nstencil_half_bin_2d_newton_ssa.h
Normal file
@ -0,0 +1,43 @@
|
||||
/* -*- c++ -*- ----------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef NSTENCIL_CLASS
|
||||
|
||||
NStencilStyle(half/bin/2d/newton/ssa,
|
||||
NStencilHalfBin2dNewtonSSA,
|
||||
NS_HALF | NS_BIN | NS_2D | NS_NEWTON | NS_SSA | NS_ORTHO)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_NSTENCIL_HALF_BIN_2D_NEWTON_SSA_H
|
||||
#define LMP_NSTENCIL_HALF_BIN_2D_NEWTON_SSA_H
|
||||
|
||||
#include "nstencil_ssa.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class NStencilHalfBin2dNewtonSSA : public NStencilSSA {
|
||||
public:
|
||||
NStencilHalfBin2dNewtonSSA(class LAMMPS *);
|
||||
~NStencilHalfBin2dNewtonSSA() {}
|
||||
void create();
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* ERROR/WARNING messages:
|
||||
|
||||
*/
|
||||
74
src/USER-DPD/nstencil_half_bin_3d_newton_ssa.cpp
Normal file
74
src/USER-DPD/nstencil_half_bin_3d_newton_ssa.cpp
Normal file
@ -0,0 +1,74 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing authors:
|
||||
James Larentzos and Timothy I. Mattox (Engility Corporation)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "nstencil_half_bin_3d_newton_ssa.h"
|
||||
#include "neighbor.h"
|
||||
#include "neigh_list.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
NStencilHalfBin3dNewtonSSA::NStencilHalfBin3dNewtonSSA(LAMMPS *lmp) :
|
||||
NStencilSSA(lmp) {}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
create stencil based on bin geometry and cutoff
|
||||
stencil = bins whose closest corner to central bin is within cutoff
|
||||
sx,sy,sz = bin bounds = furthest the stencil could possibly extend
|
||||
3d creates xyz stencil, 2d creates xy stencil
|
||||
for half list with newton on:
|
||||
stencil is bins to the "upper right" of central bin
|
||||
stencil does not include self
|
||||
additionally, includes the bins beyond nstencil that are needed
|
||||
to locate all the Active Interaction Region (AIR) ghosts for SSA
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void NStencilHalfBin3dNewtonSSA::create()
|
||||
{
|
||||
int i,j,k,pos = 0;
|
||||
|
||||
for (k = 0; k <= sz; k++)
|
||||
for (j = -sy; j <= sy; j++)
|
||||
for (i = -sx; i <= sx; i++)
|
||||
if (k > 0 || j > 0 || (j == 0 && i > 0))
|
||||
if (bin_distance(i,j,k) < cutneighmaxsq)
|
||||
stencil[pos++] = k*mbiny*mbinx + j*mbinx + i;
|
||||
|
||||
nstencil_half = pos; // record where normal half stencil ends
|
||||
|
||||
// include additional bins for AIR ghosts only
|
||||
|
||||
for (k = -sz; k < 0; k++)
|
||||
for (j = -sy; j <= sy; j++)
|
||||
for (i = -sx; i <= sx; i++)
|
||||
if (bin_distance(i,j,k) < cutneighmaxsq)
|
||||
stencil[pos++] = k*mbiny*mbinx + j*mbinx + i;
|
||||
|
||||
// For k==0, make sure to skip already included bins
|
||||
|
||||
k = 0;
|
||||
for (j = -sy; j <= 0; j++)
|
||||
for (i = -sx; i <= sx; i++) {
|
||||
if (j == 0 && i > 0) continue;
|
||||
if (bin_distance(i,j,k) < cutneighmaxsq)
|
||||
stencil[pos++] = k*mbiny*mbinx + j*mbinx + i;
|
||||
}
|
||||
|
||||
nstencil = pos; // record where full stencil ends
|
||||
}
|
||||
43
src/USER-DPD/nstencil_half_bin_3d_newton_ssa.h
Normal file
43
src/USER-DPD/nstencil_half_bin_3d_newton_ssa.h
Normal file
@ -0,0 +1,43 @@
|
||||
/* -*- c++ -*- ----------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef NSTENCIL_CLASS
|
||||
|
||||
NStencilStyle(half/bin/3d/newton/ssa,
|
||||
NStencilHalfBin3dNewtonSSA,
|
||||
NS_HALF | NS_BIN | NS_3D | NS_NEWTON | NS_SSA | NS_ORTHO)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_NSTENCIL_HALF_BIN_3D_NEWTON_SSA_H
|
||||
#define LMP_NSTENCIL_HALF_BIN_3D_NEWTON_SSA_H
|
||||
|
||||
#include "nstencil_ssa.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class NStencilHalfBin3dNewtonSSA : public NStencilSSA {
|
||||
public:
|
||||
NStencilHalfBin3dNewtonSSA(class LAMMPS *);
|
||||
~NStencilHalfBin3dNewtonSSA() {}
|
||||
void create();
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* ERROR/WARNING messages:
|
||||
|
||||
*/
|
||||
@ -11,12 +11,26 @@
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifndef LMP_NSTENCIL_SSA_H
|
||||
#define LMP_NSTENCIL_SSA_H
|
||||
|
||||
#include "nstencil.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class NStencilSSA : public NStencil {
|
||||
public:
|
||||
NStencilSSA(class LAMMPS *lmp) : NStencil(lmp) { }
|
||||
~NStencilSSA() {}
|
||||
virtual void create() = 0;
|
||||
|
||||
int nstencil_half; // where the half stencil ends
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/* ERROR/WARNING messages:
|
||||
|
||||
E: Neighbor list overflow, boost neigh_modify one
|
||||
|
||||
There are too many neighbors of a single atom. Use the neigh_modify
|
||||
command to increase the max number of neighbors allowed for one atom.
|
||||
You may also want to boost the page size.
|
||||
|
||||
*/
|
||||
@ -320,11 +320,9 @@ void PairDPDfdt::init_style()
|
||||
|
||||
splitFDT_flag = false;
|
||||
int irequest = neighbor->request(this,instance_me);
|
||||
neighbor->requests[irequest]->ssa = 0;
|
||||
for (int i = 0; i < modify->nfix; i++)
|
||||
if (strcmp(modify->fix[i]->style,"shardlow") == 0){
|
||||
splitFDT_flag = true;
|
||||
neighbor->requests[irequest]->ssa = 1;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -408,11 +408,9 @@ void PairDPDfdtEnergy::init_style()
|
||||
|
||||
splitFDT_flag = false;
|
||||
int irequest = neighbor->request(this,instance_me);
|
||||
neighbor->requests[irequest]->ssa = 0;
|
||||
for (int i = 0; i < modify->nfix; i++)
|
||||
if (strcmp(modify->fix[i]->style,"shardlow") == 0){
|
||||
splitFDT_flag = true;
|
||||
neighbor->requests[irequest]->ssa = 1;
|
||||
}
|
||||
|
||||
bool eos_flag = false;
|
||||
|
||||
@ -3,10 +3,6 @@
|
||||
|
||||
mode=$1
|
||||
|
||||
# enforce using portable C locale
|
||||
LC_ALL=C
|
||||
export LC_ALL
|
||||
|
||||
# arg1 = file, arg2 = file it depends on
|
||||
|
||||
action () {
|
||||
@ -44,6 +40,10 @@ action intel_preprocess.h
|
||||
action intel_buffers.h
|
||||
action intel_buffers.cpp
|
||||
action math_extra_intel.h
|
||||
action nbin_intel.h
|
||||
action nbin_intel.cpp
|
||||
action npair_intel.h
|
||||
action npair_intel.cpp
|
||||
action intel_simd.h pair_sw_intel.cpp
|
||||
action intel_intrinsics.h pair_tersoff_intel.cpp
|
||||
action verlet_lrt_intel.h pppm.cpp
|
||||
@ -58,18 +58,10 @@ if (test $mode = 1) then
|
||||
sed -i -e 's|^PKG_INC =[ \t]*|&-DLMP_USER_INTEL |' ../Makefile.package
|
||||
fi
|
||||
|
||||
# force rebuild of files with LMP_USER_INTEL switch
|
||||
|
||||
touch ../accelerator_intel.h
|
||||
|
||||
elif (test $mode = 0) then
|
||||
|
||||
if (test -e ../Makefile.package) then
|
||||
sed -i -e 's/[^ \t]*INTEL[^ \t]* //' ../Makefile.package
|
||||
fi
|
||||
|
||||
# force rebuild of files with LMP_USER_INTEL switch
|
||||
|
||||
touch ../accelerator_intel.h
|
||||
|
||||
fi
|
||||
|
||||
@ -317,8 +317,6 @@ void FixIntel::init()
|
||||
error->all(FLERR,
|
||||
"Currently, cannot use more than one intel style with hybrid.");
|
||||
|
||||
neighbor->fix_intel = (void *)this;
|
||||
|
||||
check_neighbor_intel();
|
||||
if (_precision_mode == PREC_MODE_SINGLE)
|
||||
_single_buffers->zero_ev();
|
||||
|
||||
@ -26,18 +26,17 @@ IntelBuffers<flt_t, acc_t>::IntelBuffers(class LAMMPS *lmp_in) :
|
||||
_buf_size(0), _buf_local_size(0) {
|
||||
_list_alloc_atoms = 0;
|
||||
_ntypes = 0;
|
||||
_off_map_maxlocal = 0;
|
||||
_off_map_listlocal = 0;
|
||||
_ccachex = 0;
|
||||
_host_nmax = 0;
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
_separate_buffers = 0;
|
||||
_off_f = 0;
|
||||
_off_map_ilist = 0;
|
||||
_off_map_nmax = 0;
|
||||
_off_map_maxhead = 0;
|
||||
_off_list_alloc = false;
|
||||
_off_threads = 0;
|
||||
_off_ccache = 0;
|
||||
_host_nmax = 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -173,21 +172,15 @@ void IntelBuffers<flt_t, acc_t>::free_nmax()
|
||||
const int * tag = _off_map_tag;
|
||||
const int * special = _off_map_special;
|
||||
const int * nspecial = _off_map_nspecial;
|
||||
const int * bins = _off_map_bins;
|
||||
const int * binpacked = _binpacked;
|
||||
if (tag != 0 && special != 0 && nspecial !=0 && bins != 0) {
|
||||
if (tag != 0 && special != 0 && nspecial !=0) {
|
||||
#pragma offload_transfer target(mic:_cop) \
|
||||
nocopy(tag:alloc_if(0) free_if(1)) \
|
||||
nocopy(special,nspecial:alloc_if(0) free_if(1)) \
|
||||
nocopy(bins,binpacked:alloc_if(0) free_if(1))
|
||||
nocopy(special,nspecial:alloc_if(0) free_if(1))
|
||||
}
|
||||
_off_map_nmax = 0;
|
||||
}
|
||||
#endif
|
||||
if (_host_nmax > 0) {
|
||||
lmp->memory->destroy(_binpacked);
|
||||
_host_nmax = 0;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
@ -195,12 +188,11 @@ void IntelBuffers<flt_t, acc_t>::free_nmax()
|
||||
template <class flt_t, class acc_t>
|
||||
void IntelBuffers<flt_t, acc_t>::_grow_nmax(const int offload_end)
|
||||
{
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
free_nmax();
|
||||
int size = lmp->atom->nmax;
|
||||
_host_nmax = size;
|
||||
lmp->memory->create(_binpacked, _host_nmax, "_binpacked");
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (!offload_end) return;
|
||||
int *special, *nspecial;
|
||||
int tag_length, special_length, nspecial_length;
|
||||
@ -220,10 +212,7 @@ void IntelBuffers<flt_t, acc_t>::_grow_nmax(const int offload_end)
|
||||
else
|
||||
tag_length = 1;
|
||||
int *tag = lmp->atom->tag;
|
||||
int *bins = lmp->neighbor->bins;
|
||||
int * binpacked = _binpacked;
|
||||
#pragma offload_transfer target(mic:_cop) \
|
||||
nocopy(bins,binpacked:length(size) alloc_if(1) free_if(0)) \
|
||||
nocopy(tag:length(tag_length) alloc_if(1) free_if(0)) \
|
||||
nocopy(special:length(special_length) alloc_if(1) free_if(0)) \
|
||||
nocopy(nspecial:length(nspecial_length) alloc_if(1) free_if(0))
|
||||
@ -231,18 +220,16 @@ void IntelBuffers<flt_t, acc_t>::_grow_nmax(const int offload_end)
|
||||
_off_map_special = special;
|
||||
_off_map_nspecial = nspecial;
|
||||
_off_map_nmax = size;
|
||||
_off_map_bins = bins;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template <class flt_t, class acc_t>
|
||||
void IntelBuffers<flt_t, acc_t>::free_local()
|
||||
void IntelBuffers<flt_t, acc_t>::free_list_local()
|
||||
{
|
||||
if (_off_map_maxlocal > 0) {
|
||||
if (_off_map_listlocal > 0) {
|
||||
int * cnumneigh = _cnumneigh;
|
||||
int * atombin = _atombin;
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (_off_map_ilist != NULL) {
|
||||
const int * ilist = _off_map_ilist;
|
||||
@ -250,40 +237,36 @@ void IntelBuffers<flt_t, acc_t>::free_local()
|
||||
_off_map_ilist = NULL;
|
||||
if (numneigh != 0 && ilist != 0) {
|
||||
#pragma offload_transfer target(mic:_cop) \
|
||||
nocopy(ilist,numneigh,cnumneigh,atombin:alloc_if(0) free_if(1))
|
||||
nocopy(ilist,numneigh,cnumneigh:alloc_if(0) free_if(1))
|
||||
}
|
||||
}
|
||||
#endif
|
||||
lmp->memory->destroy(cnumneigh);
|
||||
lmp->memory->destroy(atombin);
|
||||
_off_map_maxlocal = 0;
|
||||
_off_map_listlocal = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template <class flt_t, class acc_t>
|
||||
void IntelBuffers<flt_t, acc_t>::_grow_local(NeighList *list,
|
||||
void IntelBuffers<flt_t, acc_t>::_grow_list_local(NeighList *list,
|
||||
const int offload_end)
|
||||
{
|
||||
free_local();
|
||||
free_list_local();
|
||||
int size = list->get_maxlocal();
|
||||
lmp->memory->create(_cnumneigh, size, "_cnumneigh");
|
||||
lmp->memory->create(_atombin, size, "_atombin");
|
||||
_off_map_maxlocal = size;
|
||||
_off_map_listlocal = size;
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (offload_end > 0) {
|
||||
int * numneigh = list->numneigh;
|
||||
int * ilist = list->ilist;
|
||||
int * cnumneigh = _cnumneigh;
|
||||
int * atombin = _atombin;
|
||||
if (cnumneigh != 0 && atombin != 0) {
|
||||
if (cnumneigh != 0) {
|
||||
#pragma offload_transfer target(mic:_cop) \
|
||||
nocopy(ilist:length(size) alloc_if(1) free_if(0)) \
|
||||
nocopy(numneigh:length(size) alloc_if(1) free_if(0)) \
|
||||
nocopy(cnumneigh:length(size) alloc_if(1) free_if(0)) \
|
||||
nocopy(atombin:length(size) alloc_if(1) free_if(0))
|
||||
nocopy(cnumneigh:length(size) alloc_if(1) free_if(0))
|
||||
}
|
||||
_off_map_ilist = ilist;
|
||||
_off_map_numneigh = numneigh;
|
||||
@ -293,39 +276,6 @@ void IntelBuffers<flt_t, acc_t>::_grow_local(NeighList *list,
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template <class flt_t, class acc_t>
|
||||
void IntelBuffers<flt_t, acc_t>::free_binhead()
|
||||
{
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (_off_map_maxhead > 0) {
|
||||
const int * binhead = _off_map_binhead;
|
||||
if (binhead !=0) {
|
||||
#pragma offload_transfer target(mic:_cop) \
|
||||
nocopy(binhead:alloc_if(0) free_if(1))
|
||||
}
|
||||
_off_map_maxhead = 0;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template <class flt_t, class acc_t>
|
||||
void IntelBuffers<flt_t, acc_t>::_grow_binhead()
|
||||
{
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
free_binhead();
|
||||
int * binhead = lmp->neighbor->binhead;
|
||||
const int maxhead = lmp->neighbor->maxhead;
|
||||
#pragma offload_transfer target(mic:_cop) \
|
||||
nocopy(binhead:length(maxhead+1) alloc_if(1) free_if(0))
|
||||
_off_map_binhead = binhead;
|
||||
_off_map_maxhead = maxhead;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template <class flt_t, class acc_t>
|
||||
void IntelBuffers<flt_t, acc_t>::free_nbor_list()
|
||||
{
|
||||
@ -333,11 +283,8 @@ void IntelBuffers<flt_t, acc_t>::free_nbor_list()
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (_off_list_alloc) {
|
||||
int * list_alloc = _list_alloc;
|
||||
int * stencil = _off_map_stencil;
|
||||
if (list_alloc != 0 && stencil != 0) {
|
||||
#pragma offload_transfer target(mic:_cop) \
|
||||
nocopy(list_alloc:alloc_if(0) free_if(1))
|
||||
}
|
||||
_off_list_alloc = false;
|
||||
}
|
||||
#endif
|
||||
@ -364,33 +311,16 @@ void IntelBuffers<flt_t, acc_t>::_grow_nbor_list(NeighList *list,
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (offload_end > 0) {
|
||||
int * list_alloc =_list_alloc;
|
||||
int * stencil = list->stencil;
|
||||
|
||||
if (list_alloc != NULL) {
|
||||
#pragma offload_transfer target(mic:_cop) \
|
||||
in(stencil:length(list->maxstencil) alloc_if(1) free_if(0)) \
|
||||
nocopy(list_alloc:length(list_alloc_size) alloc_if(1) free_if(0))
|
||||
_off_map_stencil = stencil;
|
||||
_off_list_alloc = true;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
template <class flt_t, class acc_t>
|
||||
void IntelBuffers<flt_t, acc_t>::_grow_stencil(NeighList *list)
|
||||
{
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
int * stencil = _off_map_stencil;
|
||||
#pragma offload_transfer target(mic:_cop) \
|
||||
nocopy(stencil:alloc_if(0) free_if(1))
|
||||
stencil = list->stencil;
|
||||
#pragma offload_transfer target(mic:_cop) \
|
||||
in(stencil:length(list->maxstencil) alloc_if(1) free_if(0))
|
||||
_off_map_stencil = stencil;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template <class flt_t, class acc_t>
|
||||
@ -544,7 +474,6 @@ double IntelBuffers<flt_t, acc_t>::memory_usage(const int nthreads)
|
||||
if (_off_f) tmem += fstride*_off_threads * sizeof(vec3_acc_t);
|
||||
#endif
|
||||
|
||||
tmem += _off_map_maxlocal * sizeof(int) * 2;
|
||||
tmem += (_list_alloc_atoms + _off_threads) * get_max_nbors() * sizeof(int);
|
||||
tmem += _ntypes * _ntypes * sizeof(int);
|
||||
|
||||
|
||||
@ -61,50 +61,35 @@ class IntelBuffers {
|
||||
}
|
||||
|
||||
void free_buffers();
|
||||
|
||||
void free_nmax();
|
||||
inline void set_bininfo(int *atombin, int *binpacked)
|
||||
{ _atombin = atombin; _binpacked = binpacked; }
|
||||
inline void grow(const int nall, const int nlocal, const int nthreads,
|
||||
const int offload_end) {
|
||||
if (nall >= _buf_size || nlocal >= _buf_local_size)
|
||||
_grow(nall, nlocal, nthreads, offload_end);
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (lmp->atom->nmax > _host_nmax)
|
||||
_grow_nmax(offload_end);
|
||||
#endif
|
||||
}
|
||||
|
||||
inline void free_all_nbor_buffers() {
|
||||
free_nbor_list();
|
||||
free_nmax();
|
||||
free_binhead();
|
||||
free_local();
|
||||
free_list_local();
|
||||
}
|
||||
|
||||
inline void grow_nbor(NeighList *list, const int nlocal, const int nthreads,
|
||||
inline void grow_list(NeighList *list, const int nlocal, const int nthreads,
|
||||
const int offload_end, const int pack_width=1) {
|
||||
grow_local(list, offload_end);
|
||||
grow_nmax(offload_end);
|
||||
if (offload_end)
|
||||
grow_binhead();
|
||||
grow_list_local(list, offload_end);
|
||||
grow_nbor_list(list, nlocal, nthreads, offload_end, pack_width);
|
||||
}
|
||||
|
||||
void free_nmax();
|
||||
|
||||
inline void grow_nmax(const int offload_end) {
|
||||
if (lmp->atom->nmax > _host_nmax)
|
||||
_grow_nmax(offload_end);
|
||||
}
|
||||
|
||||
void free_local();
|
||||
|
||||
inline void grow_local(NeighList *list, const int offload_end) {
|
||||
if (list->get_maxlocal() > _off_map_maxlocal)
|
||||
_grow_local(list, offload_end);
|
||||
}
|
||||
|
||||
void free_binhead();
|
||||
|
||||
inline void grow_binhead() {
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (lmp->neighbor->maxhead > _off_map_maxhead)
|
||||
_grow_binhead();
|
||||
#endif
|
||||
void free_list_local();
|
||||
inline void grow_list_local(NeighList *list, const int offload_end) {
|
||||
if (list->get_maxlocal() > _off_map_listlocal)
|
||||
_grow_list_local(list, offload_end);
|
||||
}
|
||||
|
||||
void free_ccache();
|
||||
@ -134,19 +119,15 @@ class IntelBuffers {
|
||||
const int pack_width) {
|
||||
if (nlocal > _list_alloc_atoms)
|
||||
_grow_nbor_list(list, nlocal, nthreads, offload_end, pack_width);
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
else if (offload_end > 0 && _off_map_stencil != list->stencil)
|
||||
_grow_stencil(list);
|
||||
#endif
|
||||
}
|
||||
|
||||
void set_ntypes(const int ntypes);
|
||||
|
||||
inline int * firstneigh(const NeighList *list) { return _list_alloc; }
|
||||
inline int * cnumneigh(const NeighList *list) { return _cnumneigh; }
|
||||
|
||||
inline int * get_atombin() { return _atombin; }
|
||||
inline int * get_binpacked() { return _binpacked; }
|
||||
|
||||
inline atom_t * get_x(const int offload = 1) {
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (_separate_buffers && offload == 0) return _host_x;
|
||||
@ -271,13 +252,10 @@ class IntelBuffers {
|
||||
flt_t *_q;
|
||||
quat_t *_quat;
|
||||
vec3_acc_t * _f;
|
||||
int _off_threads, _off_map_maxlocal;
|
||||
int _off_threads, _off_map_listlocal;
|
||||
|
||||
int _list_alloc_atoms;
|
||||
int * _list_alloc;
|
||||
int * _cnumneigh;
|
||||
int * _atombin;
|
||||
int * _binpacked;
|
||||
int *_list_alloc, *_cnumneigh, *_atombin, *_binpacked;
|
||||
|
||||
flt_t **_cutneighsq;
|
||||
int _ntypes;
|
||||
@ -296,26 +274,24 @@ class IntelBuffers {
|
||||
flt_t *_host_q;
|
||||
quat_t *_host_quat;
|
||||
vec3_acc_t *_off_f;
|
||||
int _off_map_nmax, _off_map_maxhead, _cop, _off_ccache;
|
||||
int _off_map_nmax, _cop, _off_ccache;
|
||||
int *_off_map_ilist;
|
||||
int *_off_map_stencil, *_off_map_special, *_off_map_nspecial, *_off_map_tag;
|
||||
int *_off_map_binhead, *_off_map_bins, *_off_map_numneigh;
|
||||
int *_off_map_special, *_off_map_nspecial, *_off_map_tag;
|
||||
int *_off_map_numneigh;
|
||||
bool _off_list_alloc;
|
||||
int _need_tag;
|
||||
int _need_tag, _host_nmax;
|
||||
#endif
|
||||
|
||||
int _buf_size, _buf_local_size, _host_nmax;
|
||||
int _buf_size, _buf_local_size;
|
||||
_alignvar(acc_t _ev_global[8],64);
|
||||
_alignvar(acc_t _ev_global_host[8],64);
|
||||
|
||||
void _grow(const int nall, const int nlocal, const int nthreads,
|
||||
const int offload_end);
|
||||
void _grow_nmax(const int offload_end);
|
||||
void _grow_local(NeighList *list, const int offload_end);
|
||||
void _grow_binhead();
|
||||
void _grow_list_local(NeighList *list, const int offload_end);
|
||||
void _grow_nbor_list(NeighList *list, const int nlocal, const int nthreads,
|
||||
const int offload_end, const int pack_width);
|
||||
void _grow_stencil(NeighList *list);
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
253
src/USER-INTEL/nbin_intel.cpp
Normal file
253
src/USER-INTEL/nbin_intel.cpp
Normal file
@ -0,0 +1,253 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing author: W. Michael Brown (Intel)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "nbin_intel.h"
|
||||
#include "atom.h"
|
||||
#include "group.h"
|
||||
#include "domain.h"
|
||||
#include "comm.h"
|
||||
#include "update.h"
|
||||
#include "error.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
NBinIntel::NBinIntel(LAMMPS *lmp) : NBinStandard(lmp) {
|
||||
int ifix = modify->find_fix("package_intel");
|
||||
if (ifix < 0)
|
||||
error->all(FLERR,
|
||||
"The 'package intel' command is required for /intel styles");
|
||||
_fix = static_cast<FixIntel *>(modify->fix[ifix]);
|
||||
_precision_mode = _fix->precision();
|
||||
_atombin = NULL;
|
||||
_binpacked = NULL;
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
_cop = _fix->coprocessor_number();
|
||||
_offload_alloc = 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
NBinIntel::~NBinIntel() {
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (_offload_alloc) {
|
||||
const int * binhead = this->binhead;
|
||||
const int * bins = this->bins;
|
||||
const int * _atombin = this->_atombin;
|
||||
const int * _binpacked = this->_binpacked;
|
||||
#pragma offload_transfer target(mic:_cop) \
|
||||
nocopy(binhead,bins,_atombin,_binpacked:alloc_if(0) free_if(1))
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
setup for bin_atoms()
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void NBinIntel::bin_atoms_setup(int nall)
|
||||
{
|
||||
// binhead = per-bin vector, mbins in length
|
||||
// add 1 bin for USER-INTEL package
|
||||
|
||||
if (mbins > maxbin) {
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (_offload_alloc) {
|
||||
const int * binhead = this->binhead;
|
||||
#pragma offload_transfer target(mic:_cop) \
|
||||
nocopy(binhead:alloc_if(0) free_if(1))
|
||||
}
|
||||
#endif
|
||||
|
||||
maxbin = mbins;
|
||||
memory->destroy(binhead);
|
||||
memory->create(binhead,maxbin+1,"neigh:binhead");
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (_fix->offload_balance() != 0) {
|
||||
int * binhead = this->binhead;
|
||||
#pragma offload_transfer target(mic:_cop) \
|
||||
nocopy(binhead:length(maxbin+1) alloc_if(1) free_if(0))
|
||||
}
|
||||
#endif
|
||||
last_bin_memory = update->ntimestep;
|
||||
}
|
||||
|
||||
// bins = per-atom vector
|
||||
|
||||
if (nall > maxatom) {
|
||||
maxatom = nall;
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (_offload_alloc) {
|
||||
const int * bins = this->bins;
|
||||
const int * _atombin = this->_atombin;
|
||||
const int * _binpacked = this->_binpacked;
|
||||
#pragma offload_transfer target(mic:_cop) \
|
||||
nocopy(bins,_atombin,_binpacked:alloc_if(0) free_if(1))
|
||||
}
|
||||
#endif
|
||||
memory->destroy(bins);
|
||||
memory->destroy(_atombin);
|
||||
memory->destroy(_binpacked);
|
||||
|
||||
memory->create(bins,maxatom,"neigh:bins");
|
||||
memory->create(_atombin,maxatom,"neigh:bins");
|
||||
memory->create(_binpacked,maxatom,"neigh:bins");
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (_fix->offload_balance() != 0) {
|
||||
const int * bins = this->bins;
|
||||
const int * _atombin = this->_atombin;
|
||||
const int * _binpacked = this->_binpacked;
|
||||
#pragma offload_transfer target(mic:_cop) \
|
||||
nocopy(bins,_atombin,_binpacked:length(maxatom) alloc_if(1) free_if(0))
|
||||
_offload_alloc=1;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (_precision_mode == FixIntel::PREC_MODE_MIXED)
|
||||
_fix->get_mixed_buffers()->set_bininfo(_atombin,_binpacked);
|
||||
else if (_precision_mode == FixIntel::PREC_MODE_SINGLE)
|
||||
_fix->get_single_buffers()->set_bininfo(_atombin,_binpacked);
|
||||
else
|
||||
_fix->get_double_buffers()->set_bininfo(_atombin,_binpacked);
|
||||
|
||||
last_bin_memory = update->ntimestep;
|
||||
}
|
||||
|
||||
last_bin = update->ntimestep;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
bin owned and ghost atoms
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void NBinIntel::bin_atoms()
|
||||
{
|
||||
if (_precision_mode == FixIntel::PREC_MODE_MIXED)
|
||||
bin_atoms(_fix->get_mixed_buffers());
|
||||
else if (_precision_mode == FixIntel::PREC_MODE_SINGLE)
|
||||
bin_atoms(_fix->get_single_buffers());
|
||||
else
|
||||
bin_atoms(_fix->get_double_buffers());
|
||||
}
|
||||
|
||||
template <class flt_t, class acc_t>
|
||||
void NBinIntel::bin_atoms(IntelBuffers<flt_t,acc_t> * buffers) {
|
||||
const int nlocal = atom->nlocal;
|
||||
const int nall = nlocal + atom->nghost;
|
||||
const int aend = _fix->offload_end_neighbor();
|
||||
|
||||
|
||||
// ---------- Sanity check for padding --------------
|
||||
{
|
||||
const flt_t dx = (INTEL_BIGP - bboxhi[0]);
|
||||
const flt_t dy = (INTEL_BIGP - bboxhi[1]);
|
||||
const flt_t dz = (INTEL_BIGP - bboxhi[2]);
|
||||
if (dx * dx + dy * dy + dz * dz <
|
||||
static_cast<flt_t>(neighbor->cutneighmaxsq))
|
||||
error->one(FLERR,
|
||||
"Intel package expects no atoms within cutoff of {1e15,1e15,1e15}.");
|
||||
}
|
||||
|
||||
// ---------- Grow and cast/pack buffers -------------
|
||||
_fix->start_watch(TIME_PACK);
|
||||
buffers->grow(nall, atom->nlocal, comm->nthreads, aend);
|
||||
|
||||
ATOM_T biga;
|
||||
biga.x = INTEL_BIGP;
|
||||
biga.y = INTEL_BIGP;
|
||||
biga.z = INTEL_BIGP;
|
||||
biga.w = 1;
|
||||
buffers->get_x()[nall] = biga;
|
||||
|
||||
const int nthreads = comm->nthreads;
|
||||
#if defined(_OPENMP)
|
||||
#pragma omp parallel default(none) shared(buffers)
|
||||
#endif
|
||||
{
|
||||
int ifrom, ito, tid;
|
||||
IP_PRE_omp_range_id_align(ifrom, ito, tid, nall, nthreads,
|
||||
sizeof(ATOM_T));
|
||||
buffers->thr_pack(ifrom, ito, 0);
|
||||
}
|
||||
_fix->stop_watch(TIME_PACK);
|
||||
|
||||
|
||||
// ---------- Bin Atoms -------------
|
||||
_fix->start_watch(TIME_HOST_NEIGHBOR);
|
||||
const ATOM_T * _noalias const x = buffers->get_x();
|
||||
int * _noalias const atombin = this->_atombin;
|
||||
int * _noalias const binpacked = this->_binpacked;
|
||||
|
||||
|
||||
const double sboxlo0 = bboxlo[0] + mbinxlo/bininvx;
|
||||
const double sboxlo1 = bboxlo[1] + mbinylo/bininvy;
|
||||
const double sboxlo2 = bboxlo[2] + mbinzlo/bininvz;
|
||||
|
||||
int i, ibin;
|
||||
|
||||
for (i = 0; i < mbins; i++) binhead[i] = -1;
|
||||
|
||||
int *mask = atom->mask;
|
||||
|
||||
if (includegroup) {
|
||||
int bitmask = group->bitmask[includegroup];
|
||||
for (i = nall-1; i >= nlocal; i--) {
|
||||
if (mask[i] & bitmask) {
|
||||
ibin = coord2bin(atom->x[i]);
|
||||
bins[i] = binhead[ibin];
|
||||
binhead[ibin] = i;
|
||||
}
|
||||
}
|
||||
for (i = atom->nfirst-1; i >= 0; i--) {
|
||||
ibin = coord2bin(atom->x[i]);
|
||||
atombin[i] = ibin;
|
||||
bins[i] = binhead[ibin];
|
||||
binhead[ibin] = i;
|
||||
}
|
||||
} else {
|
||||
for (i = nall-1; i >= nlocal; i--) {
|
||||
ibin = coord2bin(atom->x[i]);
|
||||
bins[i] = binhead[ibin];
|
||||
binhead[ibin] = i;
|
||||
}
|
||||
for (i = nlocal-1; i >= 0; i--) {
|
||||
ibin = coord2bin(atom->x[i]);
|
||||
atombin[i]=ibin;
|
||||
bins[i] = binhead[ibin];
|
||||
binhead[ibin] = i;
|
||||
}
|
||||
}
|
||||
int newhead = 0;
|
||||
for (i = 0; i < mbins; i++) {
|
||||
int j = binhead[i];
|
||||
binhead[i] = newhead;
|
||||
for ( ; j >= 0; j = bins[j])
|
||||
binpacked[newhead++] = j;
|
||||
}
|
||||
binhead[mbins] = newhead;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
bigint NBinIntel::memory_usage()
|
||||
{
|
||||
return NBinStandard::memory_usage() + maxatom*2*sizeof(int);
|
||||
}
|
||||
69
src/USER-INTEL/nbin_intel.h
Normal file
69
src/USER-INTEL/nbin_intel.h
Normal file
@ -0,0 +1,69 @@
|
||||
/* -*- c++ -*- ----------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef NBIN_CLASS
|
||||
|
||||
NBinStyle(intel,
|
||||
NBinIntel,
|
||||
NB_INTEL)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_NBIN_INTEL_H
|
||||
#define LMP_NBIN_INTEL_H
|
||||
|
||||
#include "nbin_standard.h"
|
||||
#include "fix_intel.h"
|
||||
#include "memory.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class NBinIntel : public NBinStandard {
|
||||
public:
|
||||
NBinIntel(class LAMMPS *);
|
||||
~NBinIntel();
|
||||
void bin_atoms_setup(int);
|
||||
void bin_atoms();
|
||||
int * get_binpacked() { return _binpacked; }
|
||||
|
||||
private:
|
||||
FixIntel *_fix;
|
||||
int *_atombin, *_binpacked;
|
||||
int _precision_mode;
|
||||
bigint memory_usage();
|
||||
|
||||
template <class flt_t, class acc_t>
|
||||
void bin_atoms(IntelBuffers<flt_t,acc_t> *);
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
int _cop, _offload_alloc;
|
||||
#endif
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* ERROR/WARNING messages:
|
||||
|
||||
E: The 'package intel' command is required for /intel styles
|
||||
|
||||
Self-explanatory.
|
||||
|
||||
E: Intel package expects no atoms within cutoff of {1e15,1e15,1e15}.
|
||||
|
||||
The Intel package can make use of dummy atoms for padding with a large position
|
||||
that should not be within the cutoff.
|
||||
|
||||
*/
|
||||
File diff suppressed because it is too large
Load Diff
552
src/USER-INTEL/npair_full_bin_intel.cpp
Normal file
552
src/USER-INTEL/npair_full_bin_intel.cpp
Normal file
@ -0,0 +1,552 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing author: W. Michael Brown (Intel)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "npair_full_bin_intel.h"
|
||||
#include "neighbor.h"
|
||||
#include "neigh_list.h"
|
||||
#include "atom.h"
|
||||
#include "comm.h"
|
||||
#include "group.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
NPairFullBinIntel::NPairFullBinIntel(LAMMPS *lmp) : NPairIntel(lmp) {}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
binned neighbor list construction for all neighbors
|
||||
every neighbor pair appears in list of both atoms i and j
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void NPairFullBinIntel::build(NeighList *list)
|
||||
{
|
||||
if (nstencil > INTEL_MAX_STENCIL_CHECK)
|
||||
error->all(FLERR, "Too many neighbor bins for USER-INTEL package.");
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (exclude)
|
||||
error->all(FLERR, "Exclusion lists not yet supported for Intel offload");
|
||||
#endif
|
||||
|
||||
if (_fix->precision() == FixIntel::PREC_MODE_MIXED)
|
||||
fbi(list, _fix->get_mixed_buffers());
|
||||
else if (_fix->precision() == FixIntel::PREC_MODE_DOUBLE)
|
||||
fbi(list, _fix->get_double_buffers());
|
||||
else
|
||||
fbi(list, _fix->get_single_buffers());
|
||||
|
||||
_fix->stop_watch(TIME_HOST_NEIGHBOR);
|
||||
}
|
||||
|
||||
template <class flt_t, class acc_t>
|
||||
void NPairFullBinIntel::
|
||||
fbi(NeighList *list, IntelBuffers<flt_t,acc_t> *buffers) {
|
||||
const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
|
||||
list->inum = nlocal;
|
||||
list->gnum = 0;
|
||||
|
||||
int host_start = _fix->host_start_neighbor();;
|
||||
const int off_end = _fix->offload_end_neighbor();
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (off_end) grow_stencil();
|
||||
if (_fix->full_host_list()) host_start = 0;
|
||||
int offload_noghost = _fix->offload_noghost();
|
||||
#endif
|
||||
|
||||
buffers->grow_list(list, atom->nlocal, comm->nthreads, off_end,
|
||||
_fix->nbor_pack_width());
|
||||
|
||||
int need_ic = 0;
|
||||
if (atom->molecular)
|
||||
dminimum_image_check(need_ic, neighbor->cutneighmax, neighbor->cutneighmax,
|
||||
neighbor->cutneighmax);
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (need_ic) {
|
||||
if (offload_noghost) {
|
||||
fbi<flt_t,acc_t,1,1>(1, list, buffers, 0, off_end);
|
||||
fbi<flt_t,acc_t,1,1>(0, list, buffers, host_start, nlocal, off_end);
|
||||
} else {
|
||||
fbi<flt_t,acc_t,0,1>(1, list, buffers, 0, off_end);
|
||||
fbi<flt_t,acc_t,0,1>(0, list, buffers, host_start, nlocal);
|
||||
}
|
||||
} else {
|
||||
if (offload_noghost) {
|
||||
fbi<flt_t,acc_t,1,0>(1, list, buffers, 0, off_end);
|
||||
fbi<flt_t,acc_t,1,0>(0, list, buffers, host_start, nlocal, off_end);
|
||||
} else {
|
||||
fbi<flt_t,acc_t,0,0>(1, list, buffers, 0, off_end);
|
||||
fbi<flt_t,acc_t,0,0>(0, list, buffers, host_start, nlocal);
|
||||
}
|
||||
}
|
||||
#else
|
||||
if (need_ic)
|
||||
fbi<flt_t,acc_t,0,1>(0, list, buffers, host_start, nlocal);
|
||||
else
|
||||
fbi<flt_t,acc_t,0,0>(0, list, buffers, host_start, nlocal);
|
||||
#endif
|
||||
}
|
||||
|
||||
template <class flt_t, class acc_t, int offload_noghost, int need_ic>
|
||||
void NPairFullBinIntel::
|
||||
fbi(const int offload, NeighList *list, IntelBuffers<flt_t,acc_t> *buffers,
|
||||
const int astart, const int aend, const int offload_end) {
|
||||
|
||||
if (aend-astart == 0) return;
|
||||
|
||||
const int nall = atom->nlocal + atom->nghost;
|
||||
int pad = 1;
|
||||
int nall_t = nall;
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (offload_noghost && offload) nall_t = atom->nlocal;
|
||||
#endif
|
||||
|
||||
const int pack_width = _fix->nbor_pack_width();
|
||||
const int pad_width = pad;
|
||||
|
||||
const ATOM_T * _noalias const x = buffers->get_x();
|
||||
int * _noalias const firstneigh = buffers->firstneigh(list);
|
||||
const int e_nall = nall_t;
|
||||
|
||||
const int molecular = atom->molecular;
|
||||
int *ns = NULL;
|
||||
tagint *s = NULL;
|
||||
int tag_size = 0, special_size;
|
||||
if (buffers->need_tag()) tag_size = e_nall;
|
||||
if (molecular) {
|
||||
s = atom->special[0];
|
||||
ns = atom->nspecial[0];
|
||||
special_size = aend;
|
||||
} else {
|
||||
s = &buffers->_special_holder;
|
||||
ns = &buffers->_nspecial_holder;
|
||||
special_size = 0;
|
||||
}
|
||||
const tagint * _noalias const special = s;
|
||||
const int * _noalias const nspecial = ns;
|
||||
const int maxspecial = atom->maxspecial;
|
||||
const tagint * _noalias const tag = atom->tag;
|
||||
|
||||
int * _noalias const ilist = list->ilist;
|
||||
int * _noalias numneigh = list->numneigh;
|
||||
int * _noalias const cnumneigh = buffers->cnumneigh(list);
|
||||
const int nstencil = this->nstencil;
|
||||
const int * _noalias const stencil = this->stencil;
|
||||
const flt_t * _noalias const cutneighsq = buffers->get_cutneighsq()[0];
|
||||
const int ntypes = atom->ntypes + 1;
|
||||
const int nlocal = atom->nlocal;
|
||||
|
||||
#ifndef _LMP_INTEL_OFFLOAD
|
||||
int * const mask = atom->mask;
|
||||
tagint * const molecule = atom->molecule;
|
||||
#endif
|
||||
|
||||
int tnum;
|
||||
int *overflow;
|
||||
double *timer_compute;
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (offload) {
|
||||
timer_compute = _fix->off_watch_neighbor();
|
||||
tnum = buffers->get_off_threads();
|
||||
overflow = _fix->get_off_overflow_flag();
|
||||
_fix->stop_watch(TIME_HOST_NEIGHBOR);
|
||||
_fix->start_watch(TIME_OFFLOAD_LATENCY);
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
tnum = comm->nthreads;
|
||||
overflow = _fix->get_overflow_flag();
|
||||
}
|
||||
const int nthreads = tnum;
|
||||
const int maxnbors = buffers->get_max_nbors();
|
||||
int * _noalias const atombin = buffers->get_atombin();
|
||||
const int * _noalias const binpacked = buffers->get_binpacked();
|
||||
|
||||
const int xperiodic = domain->xperiodic;
|
||||
const int yperiodic = domain->yperiodic;
|
||||
const int zperiodic = domain->zperiodic;
|
||||
const flt_t xprd_half = domain->xprd_half;
|
||||
const flt_t yprd_half = domain->yprd_half;
|
||||
const flt_t zprd_half = domain->zprd_half;
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
const int * _noalias const binhead = this->binhead;
|
||||
const int * _noalias const bins = this->bins;
|
||||
const int cop = _fix->coprocessor_number();
|
||||
const int separate_buffers = _fix->separate_buffers();
|
||||
#pragma offload target(mic:cop) if(offload) \
|
||||
in(x:length(e_nall+1) alloc_if(0) free_if(0)) \
|
||||
in(tag:length(tag_size) alloc_if(0) free_if(0)) \
|
||||
in(special:length(special_size*maxspecial) alloc_if(0) free_if(0)) \
|
||||
in(nspecial:length(special_size*3) alloc_if(0) free_if(0)) \
|
||||
in(bins,binpacked:length(nall) alloc_if(0) free_if(0)) \
|
||||
in(binhead:length(mbins+1) alloc_if(0) free_if(0)) \
|
||||
in(cutneighsq:length(0) alloc_if(0) free_if(0)) \
|
||||
in(firstneigh:length(0) alloc_if(0) free_if(0)) \
|
||||
in(cnumneigh:length(0) alloc_if(0) free_if(0)) \
|
||||
out(numneigh:length(0) alloc_if(0) free_if(0)) \
|
||||
in(ilist:length(0) alloc_if(0) free_if(0)) \
|
||||
in(atombin:length(aend) alloc_if(0) free_if(0)) \
|
||||
in(stencil:length(nstencil) alloc_if(0) free_if(0)) \
|
||||
in(maxnbors,nthreads,maxspecial,nstencil,e_nall,offload,pack_width) \
|
||||
in(offload_end,separate_buffers,astart, aend, nlocal, molecular, ntypes) \
|
||||
in(xperiodic, yperiodic, zperiodic, xprd_half, yprd_half, zprd_half) \
|
||||
out(overflow:length(5) alloc_if(0) free_if(0)) \
|
||||
out(timer_compute:length(1) alloc_if(0) free_if(0)) \
|
||||
signal(tag)
|
||||
#endif
|
||||
{
|
||||
#if defined(__MIC__) && defined(_LMP_INTEL_OFFLOAD)
|
||||
*timer_compute = MIC_Wtime();
|
||||
#endif
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
overflow[LMP_LOCAL_MIN] = astart;
|
||||
overflow[LMP_LOCAL_MAX] = aend - 1;
|
||||
overflow[LMP_GHOST_MIN] = e_nall;
|
||||
overflow[LMP_GHOST_MAX] = -1;
|
||||
#endif
|
||||
|
||||
int nstencilp = 0;
|
||||
int binstart[INTEL_MAX_STENCIL], binend[INTEL_MAX_STENCIL];
|
||||
for (int k = 0; k < nstencil; k++) {
|
||||
binstart[nstencilp] = stencil[k];
|
||||
int end = stencil[k] + 1;
|
||||
for (int kk = k + 1; kk < nstencil; kk++) {
|
||||
if (stencil[kk-1]+1 == stencil[kk]) {
|
||||
end++;
|
||||
k++;
|
||||
} else break;
|
||||
}
|
||||
binend[nstencilp] = end;
|
||||
nstencilp++;
|
||||
}
|
||||
|
||||
#if defined(_OPENMP)
|
||||
#pragma omp parallel default(none) \
|
||||
shared(numneigh, overflow, nstencilp, binstart, binend)
|
||||
#endif
|
||||
{
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
int lmin = e_nall, lmax = -1, gmin = e_nall, gmax = -1;
|
||||
#endif
|
||||
|
||||
const int num = aend - astart;
|
||||
int tid, ifrom, ito;
|
||||
|
||||
IP_PRE_omp_range_id_vec(ifrom, ito, tid, num, nthreads, pack_width);
|
||||
ifrom += astart;
|
||||
ito += astart;
|
||||
int e_ito = ito;
|
||||
if (ito == num) {
|
||||
int imod = ito % pack_width;
|
||||
if (imod) e_ito += pack_width - imod;
|
||||
}
|
||||
const int list_size = (e_ito + tid * 2 + 2) * maxnbors;
|
||||
int which;
|
||||
int pack_offset = maxnbors * pack_width;
|
||||
int ct = (ifrom + tid * 2) * maxnbors;
|
||||
int *neighptr = firstneigh + ct;
|
||||
const int obound = pack_offset + maxnbors * 2;
|
||||
|
||||
int max_chunk = 0;
|
||||
int lane = 0;
|
||||
for (int i = ifrom; i < ito; i++) {
|
||||
const flt_t xtmp = x[i].x;
|
||||
const flt_t ytmp = x[i].y;
|
||||
const flt_t ztmp = x[i].z;
|
||||
const int itype = x[i].w;
|
||||
const tagint itag = tag[i];
|
||||
const int ioffset = ntypes * itype;
|
||||
|
||||
const int ibin = atombin[i];
|
||||
int raw_count = pack_offset;
|
||||
|
||||
// loop over all atoms in surrounding bins in stencil including self
|
||||
// skip i = j
|
||||
if (exclude) {
|
||||
for (int k = 0; k < nstencilp; k++) {
|
||||
const int bstart = binhead[ibin + binstart[k]];
|
||||
const int bend = binhead[ibin + binend[k]];
|
||||
#ifndef _LMP_INTEL_OFFLOAD
|
||||
#ifdef INTEL_VMASK
|
||||
#pragma simd
|
||||
#endif
|
||||
#endif
|
||||
for (int jj = bstart; jj < bend; jj++) {
|
||||
int j = binpacked[jj];
|
||||
|
||||
if (i == j) j=e_nall;
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (offload_noghost) {
|
||||
if (j < nlocal) {
|
||||
if (i < offload_end) continue;
|
||||
} else if (offload) continue;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef _LMP_INTEL_OFFLOAD
|
||||
const int jtype = x[j].w;
|
||||
if (exclusion(i,j,itype,jtype,mask,molecule)) continue;
|
||||
#endif
|
||||
|
||||
neighptr[raw_count++] = j;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (int k = 0; k < nstencilp; k++) {
|
||||
const int bstart = binhead[ibin + binstart[k]];
|
||||
const int bend = binhead[ibin + binend[k]];
|
||||
#ifndef _LMP_INTEL_OFFLOAD
|
||||
#ifdef INTEL_VMASK
|
||||
#pragma simd
|
||||
#endif
|
||||
#endif
|
||||
for (int jj = bstart; jj < bend; jj++) {
|
||||
int j = binpacked[jj];
|
||||
|
||||
if (i == j) j=e_nall;
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (offload_noghost) {
|
||||
if (j < nlocal) {
|
||||
if (i < offload_end) continue;
|
||||
} else if (offload) continue;
|
||||
}
|
||||
#endif
|
||||
|
||||
neighptr[raw_count++] = j;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (raw_count > obound) *overflow = 1;
|
||||
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
int vlmin = lmin, vlmax = lmax, vgmin = gmin, vgmax = gmax;
|
||||
#if __INTEL_COMPILER+0 > 1499
|
||||
#pragma vector aligned
|
||||
#pragma simd reduction(max:vlmax,vgmax) reduction(min:vlmin, vgmin)
|
||||
#endif
|
||||
#else
|
||||
#pragma vector aligned
|
||||
#pragma simd
|
||||
#endif
|
||||
#endif
|
||||
for (int u = pack_offset; u < raw_count; u++) {
|
||||
int j = neighptr[u];
|
||||
const flt_t delx = xtmp - x[j].x;
|
||||
const flt_t dely = ytmp - x[j].y;
|
||||
const flt_t delz = ztmp - x[j].z;
|
||||
const int jtype = x[j].w;
|
||||
const flt_t rsq = delx * delx + dely * dely + delz * delz;
|
||||
if (rsq > cutneighsq[ioffset + jtype])
|
||||
neighptr[u] = e_nall;
|
||||
else {
|
||||
if (need_ic) {
|
||||
int no_special;
|
||||
ominimum_image_check(no_special, delx, dely, delz);
|
||||
if (no_special)
|
||||
neighptr[u] = -j - 1;
|
||||
}
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (j < nlocal) {
|
||||
if (j < vlmin) vlmin = j;
|
||||
if (j > vlmax) vlmax = j;
|
||||
} else {
|
||||
if (j < vgmin) vgmin = j;
|
||||
if (j > vgmax) vgmax = j;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
lmin = MIN(lmin,vlmin);
|
||||
gmin = MIN(gmin,vgmin);
|
||||
lmax = MAX(lmax,vlmax);
|
||||
gmax = MAX(gmax,vgmax);
|
||||
#endif
|
||||
|
||||
int n = lane, n2 = pack_offset;
|
||||
for (int u = pack_offset; u < raw_count; u++) {
|
||||
const int j = neighptr[u];
|
||||
int pj = j;
|
||||
if (pj < e_nall) {
|
||||
if (need_ic)
|
||||
if (pj < 0) pj = -pj - 1;
|
||||
|
||||
const int jtag = tag[pj];
|
||||
int flist = 0;
|
||||
if (itag > jtag) {
|
||||
if ((itag+jtag) % 2 == 0) flist = 1;
|
||||
} else if (itag < jtag) {
|
||||
if ((itag+jtag) % 2 == 1) flist = 1;
|
||||
} else {
|
||||
if (x[pj].z < ztmp) flist = 1;
|
||||
else if (x[pj].z == ztmp && x[pj].y < ytmp) flist = 1;
|
||||
else if (x[pj].z == ztmp && x[pj].y == ytmp && x[pj].x < xtmp)
|
||||
flist = 1;
|
||||
}
|
||||
if (flist) {
|
||||
neighptr[n2++] = j;
|
||||
} else {
|
||||
neighptr[n] = j;
|
||||
n += pack_width;
|
||||
}
|
||||
}
|
||||
}
|
||||
int ns = (n - lane) / pack_width;
|
||||
atombin[i] = ns;
|
||||
for (int u = pack_offset; u < n2; u++) {
|
||||
neighptr[n] = neighptr[u];
|
||||
n += pack_width;
|
||||
}
|
||||
|
||||
ilist[i] = i;
|
||||
cnumneigh[i] = ct + lane;
|
||||
ns += n2 - pack_offset;
|
||||
numneigh[i] = ns;
|
||||
|
||||
if (ns > max_chunk) max_chunk = ns;
|
||||
lane++;
|
||||
if (lane == pack_width) {
|
||||
ct += max_chunk * pack_width;
|
||||
const int alignb = (INTEL_DATA_ALIGN / sizeof(int));
|
||||
const int edge = (ct % alignb);
|
||||
if (edge) ct += alignb - edge;
|
||||
neighptr = firstneigh + ct;
|
||||
max_chunk = 0;
|
||||
pack_offset = maxnbors * pack_width;
|
||||
lane = 0;
|
||||
if (ct + obound > list_size) {
|
||||
if (i < ito - 1) {
|
||||
*overflow = 1;
|
||||
ct = (ifrom + tid * 2) * maxnbors;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (*overflow == 1)
|
||||
for (int i = ifrom; i < ito; i++)
|
||||
numneigh[i] = 0;
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (separate_buffers) {
|
||||
#if defined(_OPENMP)
|
||||
#pragma omp critical
|
||||
#endif
|
||||
{
|
||||
if (lmin < overflow[LMP_LOCAL_MIN]) overflow[LMP_LOCAL_MIN] = lmin;
|
||||
if (lmax > overflow[LMP_LOCAL_MAX]) overflow[LMP_LOCAL_MAX] = lmax;
|
||||
if (gmin < overflow[LMP_GHOST_MIN]) overflow[LMP_GHOST_MIN] = gmin;
|
||||
if (gmax > overflow[LMP_GHOST_MAX]) overflow[LMP_GHOST_MAX] = gmax;
|
||||
}
|
||||
#pragma omp barrier
|
||||
}
|
||||
|
||||
int ghost_offset = 0, nall_offset = e_nall;
|
||||
if (separate_buffers) {
|
||||
int nghost = overflow[LMP_GHOST_MAX] + 1 - overflow[LMP_GHOST_MIN];
|
||||
if (nghost < 0) nghost = 0;
|
||||
if (offload) {
|
||||
ghost_offset = overflow[LMP_GHOST_MIN] - overflow[LMP_LOCAL_MAX] - 1;
|
||||
nall_offset = overflow[LMP_LOCAL_MAX] + 1 + nghost;
|
||||
} else {
|
||||
ghost_offset = overflow[LMP_GHOST_MIN] - nlocal;
|
||||
nall_offset = nlocal + nghost;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
if (molecular) {
|
||||
for (int i = ifrom; i < ito; ++i) {
|
||||
int * _noalias jlist = firstneigh + cnumneigh[i];
|
||||
const int jnum = numneigh[i];
|
||||
|
||||
const int trip = jnum * pack_width;
|
||||
for (int jj = 0; jj < trip; jj+=pack_width) {
|
||||
const int j = jlist[jj];
|
||||
if (need_ic && j < 0) {
|
||||
which = 0;
|
||||
jlist[jj] = -j - 1;
|
||||
} else
|
||||
ofind_special(which, special, nspecial, i, tag[j]);
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (j >= nlocal) {
|
||||
if (j == e_nall)
|
||||
jlist[jj] = nall_offset;
|
||||
else if (which)
|
||||
jlist[jj] = (j-ghost_offset) ^ (which << SBBITS);
|
||||
else jlist[jj]-=ghost_offset;
|
||||
} else
|
||||
#endif
|
||||
if (which) jlist[jj] = j ^ (which << SBBITS);
|
||||
}
|
||||
}
|
||||
}
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
else if (separate_buffers) {
|
||||
for (int i = ifrom; i < ito; ++i) {
|
||||
int * _noalias jlist = firstneigh + cnumneigh[i];
|
||||
const int jnum = numneigh[i];
|
||||
int jj = 0;
|
||||
for (jj = 0; jj < jnum; jj++) {
|
||||
if (jlist[jj] >= nlocal) {
|
||||
if (jlist[jj] == e_nall) jlist[jj] = nall_offset;
|
||||
else jlist[jj] -= ghost_offset;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
} // end omp
|
||||
#if defined(__MIC__) && defined(_LMP_INTEL_OFFLOAD)
|
||||
*timer_compute = MIC_Wtime() - *timer_compute;
|
||||
#endif
|
||||
} // end offload
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (offload) {
|
||||
_fix->stop_watch(TIME_OFFLOAD_LATENCY);
|
||||
_fix->start_watch(TIME_HOST_NEIGHBOR);
|
||||
for (int n = 0; n < aend; n++) {
|
||||
ilist[n] = n;
|
||||
numneigh[n] = 0;
|
||||
}
|
||||
} else {
|
||||
for (int i = astart; i < aend; i++)
|
||||
list->firstneigh[i] = firstneigh + cnumneigh[i];
|
||||
if (separate_buffers) {
|
||||
_fix->start_watch(TIME_PACK);
|
||||
_fix->set_neighbor_host_sizes();
|
||||
buffers->pack_sep_from_single(_fix->host_min_local(),
|
||||
_fix->host_used_local(),
|
||||
_fix->host_min_ghost(),
|
||||
_fix->host_used_ghost());
|
||||
_fix->stop_watch(TIME_PACK);
|
||||
}
|
||||
}
|
||||
#else
|
||||
for (int i = astart; i < aend; i++)
|
||||
list->firstneigh[i] = firstneigh + cnumneigh[i];
|
||||
#endif
|
||||
}
|
||||
51
src/USER-INTEL/npair_full_bin_intel.h
Normal file
51
src/USER-INTEL/npair_full_bin_intel.h
Normal file
@ -0,0 +1,51 @@
|
||||
/* -*- c++ -*- ----------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef NPAIR_CLASS
|
||||
|
||||
NPairStyle(full/bin/intel,
|
||||
NPairFullBinIntel,
|
||||
NP_FULL | NP_BIN | NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI |
|
||||
NP_INTEL)
|
||||
#else
|
||||
|
||||
#ifndef LMP_NPAIR_FULL_BIN_INTEL_H
|
||||
#define LMP_NPAIR_FULL_BIN_INTEL_H
|
||||
|
||||
#include "npair_intel.h"
|
||||
#include "fix_intel.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class NPairFullBinIntel : public NPairIntel {
|
||||
public:
|
||||
NPairFullBinIntel(class LAMMPS *);
|
||||
~NPairFullBinIntel() {}
|
||||
void build(class NeighList *);
|
||||
|
||||
private:
|
||||
template <class flt_t, class acc_t>
|
||||
void fbi(NeighList *, IntelBuffers<flt_t,acc_t> *);
|
||||
template <class flt_t, class acc_t, int, int>
|
||||
void fbi(const int, NeighList *, IntelBuffers<flt_t,acc_t> *, const int,
|
||||
const int, const int offload_end = 0);
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* ERROR/WARNING messages:
|
||||
|
||||
*/
|
||||
451
src/USER-INTEL/npair_half_bin_newtoff_intel.cpp
Normal file
451
src/USER-INTEL/npair_half_bin_newtoff_intel.cpp
Normal file
@ -0,0 +1,451 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing author: W. Michael Brown (Intel)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "npair_half_bin_newtoff_intel.h"
|
||||
#include "neighbor.h"
|
||||
#include "neigh_list.h"
|
||||
#include "atom.h"
|
||||
#include "comm.h"
|
||||
#include "group.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
NPairHalfBinNewtoffIntel::NPairHalfBinNewtoffIntel(LAMMPS *lmp) :
|
||||
NPairIntel(lmp) {}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
binned neighbor list construction with partial Newton's 3rd law
|
||||
each owned atom i checks own bin and other bins in stencil
|
||||
pair stored once if i,j are both owned and i < j
|
||||
pair stored by me if j is ghost (also stored by proc owning j)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void NPairHalfBinNewtoffIntel::build(NeighList *list)
|
||||
{
|
||||
if (nstencil > INTEL_MAX_STENCIL_CHECK)
|
||||
error->all(FLERR, "Too many neighbor bins for USER-INTEL package.");
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (exclude)
|
||||
error->all(FLERR, "Exclusion lists not yet supported for Intel offload");
|
||||
#endif
|
||||
|
||||
if (_fix->precision() == FixIntel::PREC_MODE_MIXED)
|
||||
hbnni(list, _fix->get_mixed_buffers());
|
||||
else if (_fix->precision() == FixIntel::PREC_MODE_DOUBLE)
|
||||
hbnni(list, _fix->get_double_buffers());
|
||||
else
|
||||
hbnni(list, _fix->get_single_buffers());
|
||||
|
||||
_fix->stop_watch(TIME_HOST_NEIGHBOR);
|
||||
}
|
||||
|
||||
template <class flt_t, class acc_t>
|
||||
void NPairHalfBinNewtoffIntel::
|
||||
hbnni(NeighList *list, IntelBuffers<flt_t,acc_t> *buffers) {
|
||||
const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
|
||||
list->inum = nlocal;
|
||||
|
||||
const int off_end = _fix->offload_end_neighbor();
|
||||
int host_start = off_end;;
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (off_end) grow_stencil();
|
||||
if (_fix->full_host_list()) host_start = 0;
|
||||
#endif
|
||||
|
||||
buffers->grow_list(list, atom->nlocal, comm->nthreads, off_end);
|
||||
|
||||
int need_ic = 0;
|
||||
if (atom->molecular)
|
||||
dminimum_image_check(need_ic, neighbor->cutneighmax, neighbor->cutneighmax,
|
||||
neighbor->cutneighmax);
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (need_ic) {
|
||||
hbnni<flt_t,acc_t,1>(1, list, buffers, 0, off_end);
|
||||
hbnni<flt_t,acc_t,1>(0, list, buffers, host_start, nlocal);
|
||||
} else {
|
||||
hbnni<flt_t,acc_t,0>(1, list, buffers, 0, off_end);
|
||||
hbnni<flt_t,acc_t,0>(0, list, buffers, host_start, nlocal);
|
||||
}
|
||||
#else
|
||||
if (need_ic)
|
||||
hbnni<flt_t,acc_t,1>(0, list, buffers, host_start, nlocal);
|
||||
else
|
||||
hbnni<flt_t,acc_t,0>(0, list, buffers, host_start, nlocal);
|
||||
#endif
|
||||
}
|
||||
|
||||
template <class flt_t, class acc_t, int need_ic>
|
||||
void NPairHalfBinNewtoffIntel::
|
||||
hbnni(const int offload, NeighList *list, IntelBuffers<flt_t,acc_t> *buffers,
|
||||
const int astart, const int aend) {
|
||||
|
||||
if (aend-astart == 0) return;
|
||||
|
||||
const int nall = atom->nlocal + atom->nghost;
|
||||
int pad = 1;
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (offload) {
|
||||
if (INTEL_MIC_NBOR_PAD > 1)
|
||||
pad = INTEL_MIC_NBOR_PAD * sizeof(float) / sizeof(flt_t);
|
||||
} else
|
||||
#endif
|
||||
if (INTEL_NBOR_PAD > 1)
|
||||
pad = INTEL_NBOR_PAD * sizeof(float) / sizeof(flt_t);
|
||||
const int pad_width = pad;
|
||||
|
||||
const ATOM_T * _noalias const x = buffers->get_x();
|
||||
int * _noalias const firstneigh = buffers->firstneigh(list);
|
||||
|
||||
const int molecular = atom->molecular;
|
||||
int *ns = NULL;
|
||||
tagint *s = NULL;
|
||||
int tag_size = 0, special_size;
|
||||
if (buffers->need_tag()) tag_size = nall;
|
||||
if (molecular) {
|
||||
s = atom->special[0];
|
||||
ns = atom->nspecial[0];
|
||||
special_size = aend;
|
||||
} else {
|
||||
s = &buffers->_special_holder;
|
||||
ns = &buffers->_nspecial_holder;
|
||||
special_size = 0;
|
||||
}
|
||||
const tagint * _noalias const special = s;
|
||||
const int * _noalias const nspecial = ns;
|
||||
const int maxspecial = atom->maxspecial;
|
||||
const tagint * _noalias const tag = atom->tag;
|
||||
|
||||
int * _noalias const ilist = list->ilist;
|
||||
int * _noalias numneigh = list->numneigh;
|
||||
int * _noalias const cnumneigh = buffers->cnumneigh(list);
|
||||
const int nstencil = this->nstencil;
|
||||
const int * _noalias const stencil = this->stencil;
|
||||
const flt_t * _noalias const cutneighsq = buffers->get_cutneighsq()[0];
|
||||
const int ntypes = atom->ntypes + 1;
|
||||
const int nlocal = atom->nlocal;
|
||||
|
||||
#ifndef _LMP_INTEL_OFFLOAD
|
||||
int * const mask = atom->mask;
|
||||
tagint * const molecule = atom->molecule;
|
||||
#endif
|
||||
|
||||
int tnum;
|
||||
int *overflow;
|
||||
double *timer_compute;
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (offload) {
|
||||
timer_compute = _fix->off_watch_neighbor();
|
||||
tnum = buffers->get_off_threads();
|
||||
overflow = _fix->get_off_overflow_flag();
|
||||
_fix->stop_watch(TIME_HOST_NEIGHBOR);
|
||||
_fix->start_watch(TIME_OFFLOAD_LATENCY);
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
tnum = comm->nthreads;
|
||||
overflow = _fix->get_overflow_flag();
|
||||
}
|
||||
const int nthreads = tnum;
|
||||
const int maxnbors = buffers->get_max_nbors();
|
||||
int * _noalias const atombin = buffers->get_atombin();
|
||||
const int * _noalias const binpacked = buffers->get_binpacked();
|
||||
|
||||
const int xperiodic = domain->xperiodic;
|
||||
const int yperiodic = domain->yperiodic;
|
||||
const int zperiodic = domain->zperiodic;
|
||||
const flt_t xprd_half = domain->xprd_half;
|
||||
const flt_t yprd_half = domain->yprd_half;
|
||||
const flt_t zprd_half = domain->zprd_half;
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
const int * _noalias const binhead = this->binhead;
|
||||
const int * _noalias const bins = this->bins;
|
||||
const int cop = _fix->coprocessor_number();
|
||||
const int separate_buffers = _fix->separate_buffers();
|
||||
#pragma offload target(mic:cop) if(offload) \
|
||||
in(x:length(nall+1) alloc_if(0) free_if(0)) \
|
||||
in(tag:length(tag_size) alloc_if(0) free_if(0)) \
|
||||
in(special:length(special_size*maxspecial) alloc_if(0) free_if(0)) \
|
||||
in(nspecial:length(special_size*3) alloc_if(0) free_if(0)) \
|
||||
in(bins,binpacked:length(nall) alloc_if(0) free_if(0)) \
|
||||
in(binhead:length(mbins+1) alloc_if(0) free_if(0)) \
|
||||
in(cutneighsq:length(0) alloc_if(0) free_if(0)) \
|
||||
in(firstneigh:length(0) alloc_if(0) free_if(0)) \
|
||||
in(cnumneigh:length(0) alloc_if(0) free_if(0)) \
|
||||
out(numneigh:length(0) alloc_if(0) free_if(0)) \
|
||||
in(ilist:length(0) alloc_if(0) free_if(0)) \
|
||||
in(atombin:length(aend) alloc_if(0) free_if(0)) \
|
||||
in(stencil:length(nstencil) alloc_if(0) free_if(0)) \
|
||||
in(maxnbors,nthreads,maxspecial,nstencil,pad_width,offload,nall) \
|
||||
in(separate_buffers, astart, aend, nlocal, molecular, ntypes) \
|
||||
in(xperiodic, yperiodic, zperiodic, xprd_half, yprd_half, zprd_half) \
|
||||
out(overflow:length(5) alloc_if(0) free_if(0)) \
|
||||
out(timer_compute:length(1) alloc_if(0) free_if(0)) \
|
||||
signal(tag)
|
||||
#endif
|
||||
{
|
||||
#if defined(__MIC__) && defined(_LMP_INTEL_OFFLOAD)
|
||||
*timer_compute = MIC_Wtime();
|
||||
#endif
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
overflow[LMP_LOCAL_MIN] = astart;
|
||||
overflow[LMP_LOCAL_MAX] = aend - 1;
|
||||
overflow[LMP_GHOST_MIN] = nall;
|
||||
overflow[LMP_GHOST_MAX] = -1;
|
||||
#endif
|
||||
|
||||
int nstencilp = 0;
|
||||
int binstart[INTEL_MAX_STENCIL], binend[INTEL_MAX_STENCIL];
|
||||
for (int k = 0; k < nstencil; k++) {
|
||||
binstart[nstencilp] = stencil[k];
|
||||
int end = stencil[k] + 1;
|
||||
for (int kk = k + 1; kk < nstencil; kk++) {
|
||||
if (stencil[kk-1]+1 == stencil[kk]) {
|
||||
end++;
|
||||
k++;
|
||||
} else break;
|
||||
}
|
||||
binend[nstencilp] = end;
|
||||
nstencilp++;
|
||||
}
|
||||
|
||||
#if defined(_OPENMP)
|
||||
#pragma omp parallel default(none) \
|
||||
shared(numneigh, overflow, nstencilp, binstart, binend)
|
||||
#endif
|
||||
{
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
int lmin = nall, lmax = -1, gmin = nall, gmax = -1;
|
||||
#endif
|
||||
|
||||
const int num = aend - astart;
|
||||
int tid, ifrom, ito;
|
||||
IP_PRE_omp_range_id(ifrom, ito, tid, num, nthreads);
|
||||
ifrom += astart;
|
||||
ito += astart;
|
||||
|
||||
int which;
|
||||
|
||||
const int list_size = (ito + tid + 1) * maxnbors;
|
||||
int ct = (ifrom + tid) * maxnbors;
|
||||
int *neighptr = firstneigh + ct;
|
||||
|
||||
for (int i = ifrom; i < ito; i++) {
|
||||
int j, k, n, n2, itype, jtype, ibin;
|
||||
double xtmp, ytmp, ztmp, delx, dely, delz, rsq;
|
||||
|
||||
n = 0;
|
||||
n2 = maxnbors;
|
||||
|
||||
xtmp = x[i].x;
|
||||
ytmp = x[i].y;
|
||||
ztmp = x[i].z;
|
||||
itype = x[i].w;
|
||||
const int ioffset = ntypes*itype;
|
||||
|
||||
// loop over all atoms in other bins in stencil including self
|
||||
// only store pair if i < j
|
||||
// stores own/own pairs only once
|
||||
// stores own/ghost pairs on both procs
|
||||
|
||||
ibin = atombin[i];
|
||||
|
||||
for (k = 0; k < nstencilp; k++) {
|
||||
const int bstart = binhead[ibin + binstart[k]];
|
||||
const int bend = binhead[ibin + binend[k]];
|
||||
for (int jj = bstart; jj < bend; jj++) {
|
||||
const int j = binpacked[jj];
|
||||
if (j <= i) continue;
|
||||
|
||||
jtype = x[j].w;
|
||||
#ifndef _LMP_INTEL_OFFLOAD
|
||||
if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
|
||||
#endif
|
||||
|
||||
delx = xtmp - x[j].x;
|
||||
dely = ytmp - x[j].y;
|
||||
delz = ztmp - x[j].z;
|
||||
rsq = delx * delx + dely * dely + delz * delz;
|
||||
if (rsq <= cutneighsq[ioffset + jtype]) {
|
||||
if (j < nlocal) {
|
||||
if (need_ic) {
|
||||
int no_special;
|
||||
ominimum_image_check(no_special, delx, dely, delz);
|
||||
if (no_special)
|
||||
neighptr[n++] = -j - 1;
|
||||
else
|
||||
neighptr[n++] = j;
|
||||
} else
|
||||
neighptr[n++] = j;
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (j < lmin) lmin = j;
|
||||
if (j > lmax) lmax = j;
|
||||
#endif
|
||||
} else {
|
||||
if (need_ic) {
|
||||
int no_special;
|
||||
ominimum_image_check(no_special, delx, dely, delz);
|
||||
if (no_special)
|
||||
neighptr[n2++] = -j - 1;
|
||||
else
|
||||
neighptr[n2++] = j;
|
||||
} else
|
||||
neighptr[n2++] = j;
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (j < gmin) gmin = j;
|
||||
if (j > gmax) gmax = j;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
ilist[i] = i;
|
||||
|
||||
cnumneigh[i] = ct;
|
||||
if (n > maxnbors) *overflow = 1;
|
||||
for (k = maxnbors; k < n2; k++) neighptr[n++] = neighptr[k];
|
||||
|
||||
const int edge = (n % pad_width);
|
||||
if (edge) {
|
||||
const int pad_end = n + (pad_width - edge);
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma loop_count min=1, max=15, avg=8
|
||||
#endif
|
||||
for ( ; n < pad_end; n++)
|
||||
neighptr[n] = nall;
|
||||
}
|
||||
numneigh[i] = n;
|
||||
while((n % (INTEL_DATA_ALIGN / sizeof(int))) != 0) n++;
|
||||
ct += n;
|
||||
neighptr += n;
|
||||
if (ct + n + maxnbors > list_size) {
|
||||
*overflow = 1;
|
||||
ct = (ifrom + tid) * maxnbors;
|
||||
}
|
||||
}
|
||||
|
||||
if (*overflow == 1)
|
||||
for (int i = ifrom; i < ito; i++)
|
||||
numneigh[i] = 0;
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (separate_buffers) {
|
||||
#if defined(_OPENMP)
|
||||
#pragma omp critical
|
||||
#endif
|
||||
{
|
||||
if (lmin < overflow[LMP_LOCAL_MIN]) overflow[LMP_LOCAL_MIN] = lmin;
|
||||
if (lmax > overflow[LMP_LOCAL_MAX]) overflow[LMP_LOCAL_MAX] = lmax;
|
||||
if (gmin < overflow[LMP_GHOST_MIN]) overflow[LMP_GHOST_MIN] = gmin;
|
||||
if (gmax > overflow[LMP_GHOST_MAX]) overflow[LMP_GHOST_MAX] = gmax;
|
||||
}
|
||||
#pragma omp barrier
|
||||
}
|
||||
|
||||
int ghost_offset = 0, nall_offset = nall;
|
||||
if (separate_buffers) {
|
||||
int nghost = overflow[LMP_GHOST_MAX] + 1 - overflow[LMP_GHOST_MIN];
|
||||
if (nghost < 0) nghost = 0;
|
||||
if (offload) {
|
||||
ghost_offset = overflow[LMP_GHOST_MIN] - overflow[LMP_LOCAL_MAX] - 1;
|
||||
nall_offset = overflow[LMP_LOCAL_MAX] + 1 + nghost;
|
||||
} else {
|
||||
ghost_offset = overflow[LMP_GHOST_MIN] - nlocal;
|
||||
nall_offset = nlocal + nghost;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
if (molecular) {
|
||||
for (int i = ifrom; i < ito; ++i) {
|
||||
int * _noalias jlist = firstneigh + cnumneigh[i];
|
||||
const int jnum = numneigh[i];
|
||||
for (int jj = 0; jj < jnum; jj++) {
|
||||
const int j = jlist[jj];
|
||||
if (need_ic && j < 0) {
|
||||
which = 0;
|
||||
jlist[jj] = -j - 1;
|
||||
} else
|
||||
ofind_special(which, special, nspecial, i, tag[j]);
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (j >= nlocal) {
|
||||
if (j == nall)
|
||||
jlist[jj] = nall_offset;
|
||||
else if (which)
|
||||
jlist[jj] = (j-ghost_offset) ^ (which << SBBITS);
|
||||
else jlist[jj]-=ghost_offset;
|
||||
} else
|
||||
#endif
|
||||
if (which) jlist[jj] = j ^ (which << SBBITS);
|
||||
}
|
||||
}
|
||||
}
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
else if (separate_buffers) {
|
||||
for (int i = ifrom; i < ito; ++i) {
|
||||
int * _noalias jlist = firstneigh + cnumneigh[i];
|
||||
const int jnum = numneigh[i];
|
||||
int jj = 0;
|
||||
for (jj = 0; jj < jnum; jj++)
|
||||
if (jlist[jj] >= nlocal) break;
|
||||
while (jj < jnum) {
|
||||
if (jlist[jj] == nall) jlist[jj] = nall_offset;
|
||||
else jlist[jj] -= ghost_offset;
|
||||
jj++;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
} // end omp
|
||||
#if defined(__MIC__) && defined(_LMP_INTEL_OFFLOAD)
|
||||
*timer_compute = MIC_Wtime() - *timer_compute;
|
||||
#endif
|
||||
} // end offload
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (offload) {
|
||||
_fix->stop_watch(TIME_OFFLOAD_LATENCY);
|
||||
_fix->start_watch(TIME_HOST_NEIGHBOR);
|
||||
for (int n = 0; n < aend; n++) {
|
||||
ilist[n] = n;
|
||||
numneigh[n] = 0;
|
||||
}
|
||||
} else {
|
||||
for (int i = astart; i < aend; i++)
|
||||
list->firstneigh[i] = firstneigh + cnumneigh[i];
|
||||
if (separate_buffers) {
|
||||
_fix->start_watch(TIME_PACK);
|
||||
_fix->set_neighbor_host_sizes();
|
||||
buffers->pack_sep_from_single(_fix->host_min_local(),
|
||||
_fix->host_used_local(),
|
||||
_fix->host_min_ghost(),
|
||||
_fix->host_used_ghost());
|
||||
_fix->stop_watch(TIME_PACK);
|
||||
}
|
||||
}
|
||||
#else
|
||||
for (int i = astart; i < aend; i++)
|
||||
list->firstneigh[i] = firstneigh + cnumneigh[i];
|
||||
#endif
|
||||
}
|
||||
52
src/USER-INTEL/npair_half_bin_newtoff_intel.h
Normal file
52
src/USER-INTEL/npair_half_bin_newtoff_intel.h
Normal file
@ -0,0 +1,52 @@
|
||||
/* -*- c++ -*- ----------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef NPAIR_CLASS
|
||||
|
||||
NPairStyle(half/bin/newtoff/intel,
|
||||
NPairHalfBinNewtoffIntel,
|
||||
NP_HALF | NP_BIN | NP_NEWTOFF | NP_ORTHO | NP_TRI | NP_INTEL)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_NPAIR_HALF_BIN_NEWTOFF_INTEL_H
|
||||
#define LMP_NPAIR_HALF_BIN_NEWTOFF_INTEL_H
|
||||
|
||||
#include "npair_intel.h"
|
||||
#include "fix_intel.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class NPairHalfBinNewtoffIntel : public NPairIntel {
|
||||
public:
|
||||
NPairHalfBinNewtoffIntel(class LAMMPS *);
|
||||
~NPairHalfBinNewtoffIntel() {}
|
||||
void build(class NeighList *);
|
||||
|
||||
private:
|
||||
template <class flt_t, class acc_t>
|
||||
void hbnni(NeighList *, IntelBuffers<flt_t,acc_t> *);
|
||||
template <class flt_t, class acc_t, int>
|
||||
void hbnni(const int, NeighList *, IntelBuffers<flt_t,acc_t> *, const int,
|
||||
const int);
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* ERROR/WARNING messages:
|
||||
|
||||
|
||||
*/
|
||||
610
src/USER-INTEL/npair_half_bin_newton_intel.cpp
Normal file
610
src/USER-INTEL/npair_half_bin_newton_intel.cpp
Normal file
@ -0,0 +1,610 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing author: W. Michael Brown (Intel)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "npair_half_bin_newton_intel.h"
|
||||
#include "neighbor.h"
|
||||
#include "neigh_list.h"
|
||||
#include "atom.h"
|
||||
#include "comm.h"
|
||||
#include "group.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
NPairHalfBinNewtonIntel::NPairHalfBinNewtonIntel(LAMMPS *lmp) :
|
||||
NPairIntel(lmp) {}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
binned neighbor list construction with full Newton's 3rd law
|
||||
each owned atom i checks its own bin and other bins in Newton stencil
|
||||
every pair stored exactly once by some processor
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void NPairHalfBinNewtonIntel::build(NeighList *list)
|
||||
{
|
||||
if (nstencil / 2 > INTEL_MAX_STENCIL_CHECK)
|
||||
error->all(FLERR, "Too many neighbor bins for USER-INTEL package.");
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (exclude)
|
||||
error->all(FLERR, "Exclusion lists not yet supported for Intel offload");
|
||||
#endif
|
||||
|
||||
if (_fix->precision() == FixIntel::PREC_MODE_MIXED)
|
||||
hbni(list, _fix->get_mixed_buffers());
|
||||
else if (_fix->precision() == FixIntel::PREC_MODE_DOUBLE)
|
||||
hbni(list, _fix->get_double_buffers());
|
||||
else
|
||||
hbni(list, _fix->get_single_buffers());
|
||||
|
||||
_fix->stop_watch(TIME_HOST_NEIGHBOR);
|
||||
}
|
||||
|
||||
template <class flt_t, class acc_t>
|
||||
void NPairHalfBinNewtonIntel::
|
||||
hbni(NeighList *list, IntelBuffers<flt_t,acc_t> *buffers) {
|
||||
const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
|
||||
list->inum = nlocal;
|
||||
|
||||
int host_start = _fix->host_start_neighbor();
|
||||
const int off_end = _fix->offload_end_neighbor();
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (off_end) grow_stencil();
|
||||
if (_fix->full_host_list()) host_start = 0;
|
||||
int offload_noghost = _fix->offload_noghost();
|
||||
#endif
|
||||
|
||||
buffers->grow_list(list, atom->nlocal, comm->nthreads, off_end);
|
||||
|
||||
int need_ic = 0;
|
||||
if (atom->molecular)
|
||||
dminimum_image_check(need_ic, neighbor->cutneighmax, neighbor->cutneighmax,
|
||||
neighbor->cutneighmax);
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (need_ic) {
|
||||
if (offload_noghost) {
|
||||
hbni<flt_t,acc_t,1,1>(1, list, buffers, 0, off_end);
|
||||
hbni<flt_t,acc_t,1,1>(0, list, buffers, host_start, nlocal, off_end);
|
||||
} else {
|
||||
hbni<flt_t,acc_t,0,1>(1, list, buffers, 0, off_end);
|
||||
hbni<flt_t,acc_t,0,1>(0, list, buffers, host_start, nlocal);
|
||||
}
|
||||
} else {
|
||||
if (offload_noghost) {
|
||||
hbni<flt_t,acc_t,1,0>(1, list, buffers, 0, off_end);
|
||||
hbni<flt_t,acc_t,1,0>(0, list, buffers, host_start, nlocal, off_end);
|
||||
} else {
|
||||
hbni<flt_t,acc_t,0,0>(1, list, buffers, 0, off_end);
|
||||
hbni<flt_t,acc_t,0,0>(0, list, buffers, host_start, nlocal);
|
||||
}
|
||||
}
|
||||
#else
|
||||
if (need_ic)
|
||||
hbni<flt_t,acc_t,0,1>(0, list, buffers, host_start, nlocal);
|
||||
else
|
||||
hbni<flt_t,acc_t,0,0>(0, list, buffers, host_start, nlocal);
|
||||
#endif
|
||||
}
|
||||
|
||||
template <class flt_t, class acc_t, int offload_noghost, int need_ic>
|
||||
void NPairHalfBinNewtonIntel::
|
||||
hbni(const int offload, NeighList *list, IntelBuffers<flt_t,acc_t> *buffers,
|
||||
const int astart, const int aend, const int offload_end) {
|
||||
|
||||
if (aend-astart == 0) return;
|
||||
|
||||
const int nall = atom->nlocal + atom->nghost;
|
||||
int pad = 1;
|
||||
int nall_t = nall;
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (offload_noghost && offload) nall_t = atom->nlocal;
|
||||
if (offload) {
|
||||
if (INTEL_MIC_NBOR_PAD > 1)
|
||||
pad = INTEL_MIC_NBOR_PAD * sizeof(float) / sizeof(flt_t);
|
||||
} else
|
||||
#endif
|
||||
if (INTEL_NBOR_PAD > 1)
|
||||
pad = INTEL_NBOR_PAD * sizeof(float) / sizeof(flt_t);
|
||||
const int pad_width = pad;
|
||||
|
||||
const ATOM_T * _noalias const x = buffers->get_x();
|
||||
int * _noalias const firstneigh = buffers->firstneigh(list);
|
||||
const int e_nall = nall_t;
|
||||
|
||||
const int molecular = atom->molecular;
|
||||
int *ns = NULL;
|
||||
tagint *s = NULL;
|
||||
int tag_size = 0, special_size;
|
||||
if (buffers->need_tag()) tag_size = e_nall;
|
||||
if (molecular) {
|
||||
s = atom->special[0];
|
||||
ns = atom->nspecial[0];
|
||||
special_size = aend;
|
||||
} else {
|
||||
s = &buffers->_special_holder;
|
||||
ns = &buffers->_nspecial_holder;
|
||||
special_size = 0;
|
||||
}
|
||||
const tagint * _noalias const special = s;
|
||||
const int * _noalias const nspecial = ns;
|
||||
const int maxspecial = atom->maxspecial;
|
||||
const tagint * _noalias const tag = atom->tag;
|
||||
|
||||
int * _noalias const ilist = list->ilist;
|
||||
int * _noalias numneigh = list->numneigh;
|
||||
int * _noalias const cnumneigh = buffers->cnumneigh(list);
|
||||
const int nstencil = this->nstencil;
|
||||
const int * _noalias const stencil = this->stencil;
|
||||
const flt_t * _noalias const cutneighsq = buffers->get_cutneighsq()[0];
|
||||
const int ntypes = atom->ntypes + 1;
|
||||
const int nlocal = atom->nlocal;
|
||||
|
||||
#ifndef _LMP_INTEL_OFFLOAD
|
||||
int * const mask = atom->mask;
|
||||
tagint * const molecule = atom->molecule;
|
||||
#endif
|
||||
|
||||
int tnum;
|
||||
int *overflow;
|
||||
double *timer_compute;
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (offload) {
|
||||
timer_compute = _fix->off_watch_neighbor();
|
||||
tnum = buffers->get_off_threads();
|
||||
overflow = _fix->get_off_overflow_flag();
|
||||
_fix->stop_watch(TIME_HOST_NEIGHBOR);
|
||||
_fix->start_watch(TIME_OFFLOAD_LATENCY);
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
tnum = comm->nthreads;
|
||||
overflow = _fix->get_overflow_flag();
|
||||
}
|
||||
const int nthreads = tnum;
|
||||
const int maxnbors = buffers->get_max_nbors();
|
||||
int * _noalias const atombin = buffers->get_atombin();
|
||||
const int * _noalias const binpacked = buffers->get_binpacked();
|
||||
|
||||
const int xperiodic = domain->xperiodic;
|
||||
const int yperiodic = domain->yperiodic;
|
||||
const int zperiodic = domain->zperiodic;
|
||||
const flt_t xprd_half = domain->xprd_half;
|
||||
const flt_t yprd_half = domain->yprd_half;
|
||||
const flt_t zprd_half = domain->zprd_half;
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
const int * _noalias const binhead = this->binhead;
|
||||
const int * _noalias const bins = this->bins;
|
||||
const int cop = _fix->coprocessor_number();
|
||||
const int separate_buffers = _fix->separate_buffers();
|
||||
#pragma offload target(mic:cop) if(offload) \
|
||||
in(x:length(e_nall+1) alloc_if(0) free_if(0)) \
|
||||
in(tag:length(tag_size) alloc_if(0) free_if(0)) \
|
||||
in(special:length(special_size*maxspecial) alloc_if(0) free_if(0)) \
|
||||
in(nspecial:length(special_size*3) alloc_if(0) free_if(0)) \
|
||||
in(bins,binpacked:length(nall) alloc_if(0) free_if(0)) \
|
||||
in(binhead:length(mbins+1) alloc_if(0) free_if(0)) \
|
||||
in(cutneighsq:length(0) alloc_if(0) free_if(0)) \
|
||||
in(firstneigh:length(0) alloc_if(0) free_if(0)) \
|
||||
in(cnumneigh:length(0) alloc_if(0) free_if(0)) \
|
||||
out(numneigh:length(0) alloc_if(0) free_if(0)) \
|
||||
in(ilist:length(0) alloc_if(0) free_if(0)) \
|
||||
in(atombin:length(aend) alloc_if(0) free_if(0)) \
|
||||
in(stencil:length(nstencil) alloc_if(0) free_if(0)) \
|
||||
in(maxnbors,nthreads,maxspecial,nstencil,e_nall,offload,pad_width) \
|
||||
in(offload_end,separate_buffers,astart, aend, nlocal, molecular, ntypes) \
|
||||
in(xperiodic, yperiodic, zperiodic, xprd_half, yprd_half, zprd_half) \
|
||||
out(overflow:length(5) alloc_if(0) free_if(0)) \
|
||||
out(timer_compute:length(1) alloc_if(0) free_if(0)) \
|
||||
signal(tag)
|
||||
#endif
|
||||
{
|
||||
#if defined(__MIC__) && defined(_LMP_INTEL_OFFLOAD)
|
||||
*timer_compute = MIC_Wtime();
|
||||
#endif
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
overflow[LMP_LOCAL_MIN] = astart;
|
||||
overflow[LMP_LOCAL_MAX] = aend - 1;
|
||||
overflow[LMP_GHOST_MIN] = e_nall;
|
||||
overflow[LMP_GHOST_MAX] = -1;
|
||||
#endif
|
||||
|
||||
int nstencilp = 0;
|
||||
int binstart[INTEL_MAX_STENCIL], binend[INTEL_MAX_STENCIL];
|
||||
for (int k = 0; k < nstencil; k++) {
|
||||
binstart[nstencilp] = stencil[k];
|
||||
int end = stencil[k] + 1;
|
||||
for (int kk = k + 1; kk < nstencil; kk++) {
|
||||
if (stencil[kk-1]+1 == stencil[kk]) {
|
||||
end++;
|
||||
k++;
|
||||
} else break;
|
||||
}
|
||||
binend[nstencilp] = end;
|
||||
nstencilp++;
|
||||
}
|
||||
|
||||
#if defined(_OPENMP)
|
||||
#pragma omp parallel default(none) \
|
||||
shared(numneigh, overflow, nstencilp, binstart, binend)
|
||||
#endif
|
||||
{
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
int lmin = e_nall, lmax = -1, gmin = e_nall, gmax = -1;
|
||||
#endif
|
||||
|
||||
const int num = aend - astart;
|
||||
int tid, ifrom, ito;
|
||||
|
||||
#ifdef OUTER_CHUNK
|
||||
const int swidth = ip_simd::SIMD_type<flt_t>::width();
|
||||
IP_PRE_omp_range_id_vec(ifrom, ito, tid, num, nthreads, swidth);
|
||||
ifrom += astart;
|
||||
ito += astart;
|
||||
int e_ito = ito;
|
||||
if (ito == num) {
|
||||
int imod = ito % swidth;
|
||||
if (imod) e_ito += swidth - imod;
|
||||
}
|
||||
const int list_size = (e_ito + tid * 2 + 2) * maxnbors;
|
||||
#else
|
||||
const int swidth = 1;
|
||||
IP_PRE_omp_range_id(ifrom, ito, tid, num, nthreads);
|
||||
ifrom += astart;
|
||||
ito += astart;
|
||||
const int list_size = (ito + tid * 2 + 2) * maxnbors;
|
||||
#endif
|
||||
|
||||
int which;
|
||||
|
||||
int pack_offset = maxnbors * swidth;
|
||||
int ct = (ifrom + tid * 2) * maxnbors;
|
||||
int *neighptr = firstneigh + ct;
|
||||
const int obound = pack_offset + maxnbors * 2;
|
||||
|
||||
int max_chunk = 0;
|
||||
int lane = 0;
|
||||
for (int i = ifrom; i < ito; i++) {
|
||||
const flt_t xtmp = x[i].x;
|
||||
const flt_t ytmp = x[i].y;
|
||||
const flt_t ztmp = x[i].z;
|
||||
const int itype = x[i].w;
|
||||
const int ioffset = ntypes * itype;
|
||||
|
||||
// loop over rest of atoms in i's bin, ghosts are at end of linked list
|
||||
// if j is owned atom, store it, since j is beyond i in linked list
|
||||
// if j is ghost, only store if j coords are "above/to the right" of i
|
||||
|
||||
int raw_count = pack_offset;
|
||||
for (int j = bins[i]; j >= 0; j = bins[j]) {
|
||||
if (j >= nlocal) {
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (offload_noghost && offload) continue;
|
||||
#endif
|
||||
if (x[j].z < ztmp) continue;
|
||||
if (x[j].z == ztmp) {
|
||||
if (x[j].y < ytmp) continue;
|
||||
if (x[j].y == ytmp && x[j].x < xtmp) continue;
|
||||
}
|
||||
}
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
else if (offload_noghost && i < offload_end) continue;
|
||||
#endif
|
||||
|
||||
#ifndef _LMP_INTEL_OFFLOAD
|
||||
if (exclude) {
|
||||
const int jtype = x[j].w;
|
||||
if (exclusion(i,j,itype,jtype,mask,molecule)) continue;
|
||||
}
|
||||
#endif
|
||||
|
||||
neighptr[raw_count++] = j;
|
||||
}
|
||||
|
||||
// loop over all atoms in other bins in stencil, store every pair
|
||||
|
||||
const int ibin = atombin[i];
|
||||
if (exclude) {
|
||||
for (int k = 0; k < nstencilp; k++) {
|
||||
const int bstart = binhead[ibin + binstart[k]];
|
||||
const int bend = binhead[ibin + binend[k]];
|
||||
#ifndef _LMP_INTEL_OFFLOAD
|
||||
#ifdef INTEL_VMASK
|
||||
#pragma simd
|
||||
#endif
|
||||
#endif
|
||||
for (int jj = bstart; jj < bend; jj++) {
|
||||
const int j = binpacked[jj];
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (offload_noghost) {
|
||||
if (j < nlocal) {
|
||||
if (i < offload_end) continue;
|
||||
} else if (offload) continue;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef _LMP_INTEL_OFFLOAD
|
||||
const int jtype = x[j].w;
|
||||
if (exclusion(i,j,itype,jtype,mask,molecule)) continue;
|
||||
#endif
|
||||
|
||||
neighptr[raw_count++] = j;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (int k = 0; k < nstencilp; k++) {
|
||||
const int bstart = binhead[ibin + binstart[k]];
|
||||
const int bend = binhead[ibin + binend[k]];
|
||||
#ifndef _LMP_INTEL_OFFLOAD
|
||||
#ifdef INTEL_VMASK
|
||||
#pragma simd
|
||||
#endif
|
||||
#endif
|
||||
for (int jj = bstart; jj < bend; jj++) {
|
||||
const int j = binpacked[jj];
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (offload_noghost) {
|
||||
if (j < nlocal) {
|
||||
if (i < offload_end) continue;
|
||||
} else if (offload) continue;
|
||||
}
|
||||
#endif
|
||||
|
||||
neighptr[raw_count++] = j;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (raw_count > obound) *overflow = 1;
|
||||
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
int vlmin = lmin, vlmax = lmax, vgmin = gmin, vgmax = gmax;
|
||||
#if __INTEL_COMPILER+0 > 1499
|
||||
#pragma vector aligned
|
||||
#pragma simd reduction(max:vlmax,vgmax) reduction(min:vlmin, vgmin)
|
||||
#endif
|
||||
#else
|
||||
#pragma vector aligned
|
||||
#pragma simd
|
||||
#endif
|
||||
#endif
|
||||
for (int u = pack_offset; u < raw_count; u++) {
|
||||
int j = neighptr[u];
|
||||
const flt_t delx = xtmp - x[j].x;
|
||||
const flt_t dely = ytmp - x[j].y;
|
||||
const flt_t delz = ztmp - x[j].z;
|
||||
const int jtype = x[j].w;
|
||||
const flt_t rsq = delx * delx + dely * dely + delz * delz;
|
||||
if (rsq > cutneighsq[ioffset + jtype])
|
||||
neighptr[u] = e_nall;
|
||||
else {
|
||||
if (need_ic) {
|
||||
int no_special;
|
||||
ominimum_image_check(no_special, delx, dely, delz);
|
||||
if (no_special)
|
||||
neighptr[u] = -j - 1;
|
||||
}
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (j < nlocal) {
|
||||
if (j < vlmin) vlmin = j;
|
||||
if (j > vlmax) vlmax = j;
|
||||
} else {
|
||||
if (j < vgmin) vgmin = j;
|
||||
if (j > vgmax) vgmax = j;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
lmin = MIN(lmin,vlmin);
|
||||
gmin = MIN(gmin,vgmin);
|
||||
lmax = MAX(lmax,vlmax);
|
||||
gmax = MAX(gmax,vgmax);
|
||||
#endif
|
||||
|
||||
int n = lane, n2 = pack_offset;
|
||||
for (int u = pack_offset; u < raw_count; u++) {
|
||||
const int j = neighptr[u];
|
||||
int pj = j;
|
||||
if (pj < e_nall) {
|
||||
if (need_ic)
|
||||
if (pj < 0) pj = -pj - 1;
|
||||
|
||||
if (pj < nlocal) {
|
||||
neighptr[n] = j;
|
||||
n += swidth;
|
||||
} else
|
||||
neighptr[n2++] = j;
|
||||
}
|
||||
}
|
||||
int ns = (n - lane) / swidth;
|
||||
for (int u = pack_offset; u < n2; u++) {
|
||||
neighptr[n] = neighptr[u];
|
||||
n += swidth;
|
||||
}
|
||||
|
||||
ilist[i] = i;
|
||||
cnumneigh[i] = ct + lane;
|
||||
ns += n2 - pack_offset;
|
||||
#ifndef OUTER_CHUNK
|
||||
int edge = (ns % pad_width);
|
||||
if (edge) {
|
||||
const int pad_end = ns + (pad_width - edge);
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma loop_count min=1, max=15, avg=8
|
||||
#endif
|
||||
for ( ; ns < pad_end; ns++)
|
||||
neighptr[ns] = e_nall;
|
||||
}
|
||||
#endif
|
||||
numneigh[i] = ns;
|
||||
|
||||
#ifdef OUTER_CHUNK
|
||||
if (ns > max_chunk) max_chunk = ns;
|
||||
lane++;
|
||||
if (lane == swidth) {
|
||||
ct += max_chunk * swidth;
|
||||
const int alignb = (INTEL_DATA_ALIGN / sizeof(int));
|
||||
int edge = (ct % alignb);
|
||||
if (edge) ct += alignb - edge;
|
||||
neighptr = firstneigh + ct;
|
||||
max_chunk = 0;
|
||||
pack_offset = maxnbors * swidth;
|
||||
lane = 0;
|
||||
if (ct + obound > list_size) {
|
||||
if (i < ito - 1) {
|
||||
*overflow = 1;
|
||||
ct = (ifrom + tid * 2) * maxnbors;
|
||||
}
|
||||
}
|
||||
}
|
||||
#else
|
||||
ct += ns;
|
||||
const int alignb = (INTEL_DATA_ALIGN / sizeof(int));
|
||||
edge = (ct % alignb);
|
||||
if (edge) ct += alignb - edge;
|
||||
neighptr = firstneigh + ct;
|
||||
if (ct + obound > list_size) {
|
||||
if (i < ito - 1) {
|
||||
*overflow = 1;
|
||||
ct = (ifrom + tid * 2) * maxnbors;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
if (*overflow == 1)
|
||||
for (int i = ifrom; i < ito; i++)
|
||||
numneigh[i] = 0;
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (separate_buffers) {
|
||||
#if defined(_OPENMP)
|
||||
#pragma omp critical
|
||||
#endif
|
||||
{
|
||||
if (lmin < overflow[LMP_LOCAL_MIN]) overflow[LMP_LOCAL_MIN] = lmin;
|
||||
if (lmax > overflow[LMP_LOCAL_MAX]) overflow[LMP_LOCAL_MAX] = lmax;
|
||||
if (gmin < overflow[LMP_GHOST_MIN]) overflow[LMP_GHOST_MIN] = gmin;
|
||||
if (gmax > overflow[LMP_GHOST_MAX]) overflow[LMP_GHOST_MAX] = gmax;
|
||||
}
|
||||
#pragma omp barrier
|
||||
}
|
||||
|
||||
int ghost_offset = 0, nall_offset = e_nall;
|
||||
if (separate_buffers) {
|
||||
int nghost = overflow[LMP_GHOST_MAX] + 1 - overflow[LMP_GHOST_MIN];
|
||||
if (nghost < 0) nghost = 0;
|
||||
if (offload) {
|
||||
ghost_offset = overflow[LMP_GHOST_MIN] - overflow[LMP_LOCAL_MAX] - 1;
|
||||
nall_offset = overflow[LMP_LOCAL_MAX] + 1 + nghost;
|
||||
} else {
|
||||
ghost_offset = overflow[LMP_GHOST_MIN] - nlocal;
|
||||
nall_offset = nlocal + nghost;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
if (molecular) {
|
||||
for (int i = ifrom; i < ito; ++i) {
|
||||
int * _noalias jlist = firstneigh + cnumneigh[i];
|
||||
const int jnum = numneigh[i];
|
||||
#ifndef OUTER_CHUNK
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma vector aligned
|
||||
#pragma simd
|
||||
#endif
|
||||
for (int jj = 0; jj < jnum; jj++) {
|
||||
#else
|
||||
const int trip = jnum * swidth;
|
||||
for (int jj = 0; jj < trip; jj+= swidth) {
|
||||
#endif
|
||||
const int j = jlist[jj];
|
||||
if (need_ic && j < 0) {
|
||||
which = 0;
|
||||
jlist[jj] = -j - 1;
|
||||
} else
|
||||
ofind_special(which, special, nspecial, i, tag[j]);
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (j >= nlocal) {
|
||||
if (j == e_nall)
|
||||
jlist[jj] = nall_offset;
|
||||
else if (which)
|
||||
jlist[jj] = (j-ghost_offset) ^ (which << SBBITS);
|
||||
else jlist[jj]-=ghost_offset;
|
||||
} else
|
||||
#endif
|
||||
if (which) jlist[jj] = j ^ (which << SBBITS);
|
||||
}
|
||||
}
|
||||
}
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
else if (separate_buffers) {
|
||||
for (int i = ifrom; i < ito; ++i) {
|
||||
int * _noalias jlist = firstneigh + cnumneigh[i];
|
||||
const int jnum = numneigh[i];
|
||||
int jj = 0;
|
||||
for (jj = 0; jj < jnum; jj++)
|
||||
if (jlist[jj] >= nlocal) break;
|
||||
while (jj < jnum) {
|
||||
if (jlist[jj] == e_nall) jlist[jj] = nall_offset;
|
||||
else jlist[jj] -= ghost_offset;
|
||||
jj++;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
} // end omp
|
||||
#if defined(__MIC__) && defined(_LMP_INTEL_OFFLOAD)
|
||||
*timer_compute = MIC_Wtime() - *timer_compute;
|
||||
#endif
|
||||
} // end offload
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (offload) {
|
||||
_fix->stop_watch(TIME_OFFLOAD_LATENCY);
|
||||
_fix->start_watch(TIME_HOST_NEIGHBOR);
|
||||
for (int n = 0; n < aend; n++) {
|
||||
ilist[n] = n;
|
||||
numneigh[n] = 0;
|
||||
}
|
||||
} else {
|
||||
for (int i = astart; i < aend; i++)
|
||||
list->firstneigh[i] = firstneigh + cnumneigh[i];
|
||||
if (separate_buffers) {
|
||||
_fix->start_watch(TIME_PACK);
|
||||
_fix->set_neighbor_host_sizes();
|
||||
buffers->pack_sep_from_single(_fix->host_min_local(),
|
||||
_fix->host_used_local(),
|
||||
_fix->host_min_ghost(),
|
||||
_fix->host_used_ghost());
|
||||
_fix->stop_watch(TIME_PACK);
|
||||
}
|
||||
}
|
||||
#else
|
||||
for (int i = astart; i < aend; i++)
|
||||
list->firstneigh[i] = firstneigh + cnumneigh[i];
|
||||
#endif
|
||||
}
|
||||
51
src/USER-INTEL/npair_half_bin_newton_intel.h
Normal file
51
src/USER-INTEL/npair_half_bin_newton_intel.h
Normal file
@ -0,0 +1,51 @@
|
||||
/* -*- c++ -*- ----------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef NPAIR_CLASS
|
||||
|
||||
NPairStyle(half/bin/newton/intel,
|
||||
NPairHalfBinNewtonIntel,
|
||||
NP_HALF | NP_BIN | NP_NEWTON | NP_ORTHO | NP_INTEL)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_NPAIR_HALF_BIN_NEWTON_INTEL_H
|
||||
#define LMP_NPAIR_HALF_BIN_NEWTON_INTEL_H
|
||||
|
||||
#include "npair_intel.h"
|
||||
#include "fix_intel.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class NPairHalfBinNewtonIntel : public NPairIntel {
|
||||
public:
|
||||
NPairHalfBinNewtonIntel(class LAMMPS *);
|
||||
~NPairHalfBinNewtonIntel() {}
|
||||
void build(class NeighList *);
|
||||
|
||||
private:
|
||||
template <class flt_t, class acc_t>
|
||||
void hbni(NeighList *, IntelBuffers<flt_t,acc_t> *);
|
||||
template <class flt_t, class acc_t, int, int>
|
||||
void hbni(const int, NeighList *, IntelBuffers<flt_t,acc_t> *, const int,
|
||||
const int, const int offload_end = 0);
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* ERROR/WARNING messages:
|
||||
|
||||
*/
|
||||
513
src/USER-INTEL/npair_half_bin_newton_tri_intel.cpp
Normal file
513
src/USER-INTEL/npair_half_bin_newton_tri_intel.cpp
Normal file
@ -0,0 +1,513 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing author: W. Michael Brown (Intel)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "npair_half_bin_newton_tri_intel.h"
|
||||
#include "neighbor.h"
|
||||
#include "neigh_list.h"
|
||||
#include "atom.h"
|
||||
#include "comm.h"
|
||||
#include "group.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
NPairHalfBinNewtonTriIntel::NPairHalfBinNewtonTriIntel(LAMMPS *lmp) :
|
||||
NPairIntel(lmp) {}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
binned neighbor list construction with Newton's 3rd law for triclinic
|
||||
each owned atom i checks its own bin and other bins in triclinic stencil
|
||||
every pair stored exactly once by some processor
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void NPairHalfBinNewtonTriIntel::build(NeighList *list)
|
||||
{
|
||||
if (nstencil > INTEL_MAX_STENCIL)
|
||||
error->all(FLERR, "Too many neighbor bins for USER-INTEL package.");
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (exclude)
|
||||
error->all(FLERR, "Exclusion lists not yet supported for Intel offload");
|
||||
#endif
|
||||
|
||||
if (_fix->precision() == FixIntel::PREC_MODE_MIXED)
|
||||
hbnti(list, _fix->get_mixed_buffers());
|
||||
else if (_fix->precision() == FixIntel::PREC_MODE_DOUBLE)
|
||||
hbnti(list, _fix->get_double_buffers());
|
||||
else
|
||||
hbnti(list, _fix->get_single_buffers());
|
||||
|
||||
_fix->stop_watch(TIME_HOST_NEIGHBOR);
|
||||
}
|
||||
|
||||
template <class flt_t, class acc_t>
|
||||
void NPairHalfBinNewtonTriIntel::
|
||||
hbnti(NeighList *list, IntelBuffers<flt_t,acc_t> *buffers) {
|
||||
const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
|
||||
list->inum = nlocal;
|
||||
|
||||
int host_start = _fix->host_start_neighbor();
|
||||
const int off_end = _fix->offload_end_neighbor();
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (off_end) grow_stencil();
|
||||
if (_fix->full_host_list()) host_start = 0;
|
||||
int offload_noghost = _fix->offload_noghost();
|
||||
#endif
|
||||
|
||||
buffers->grow_list(list, atom->nlocal, comm->nthreads, off_end);
|
||||
|
||||
int need_ic = 0;
|
||||
if (atom->molecular)
|
||||
dminimum_image_check(need_ic, neighbor->cutneighmax, neighbor->cutneighmax,
|
||||
neighbor->cutneighmax);
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (need_ic) {
|
||||
if (offload_noghost) {
|
||||
hbnti<flt_t,acc_t,1,1>(1, list, buffers, 0, off_end);
|
||||
hbnti<flt_t,acc_t,1,1>(0, list, buffers, host_start, nlocal, off_end);
|
||||
} else {
|
||||
hbnti<flt_t,acc_t,0,1>(1, list, buffers, 0, off_end);
|
||||
hbnti<flt_t,acc_t,0,1>(0, list, buffers, host_start, nlocal);
|
||||
}
|
||||
} else {
|
||||
if (offload_noghost) {
|
||||
hbnti<flt_t,acc_t,1,0>(1, list, buffers, 0, off_end);
|
||||
hbnti<flt_t,acc_t,1,0>(0, list, buffers, host_start, nlocal, off_end);
|
||||
} else {
|
||||
hbnti<flt_t,acc_t,0,0>(1, list, buffers, 0, off_end);
|
||||
hbnti<flt_t,acc_t,0,0>(0, list, buffers, host_start, nlocal);
|
||||
}
|
||||
}
|
||||
#else
|
||||
if (need_ic)
|
||||
hbnti<flt_t,acc_t,0,1>(0, list, buffers, host_start, nlocal);
|
||||
else
|
||||
hbnti<flt_t,acc_t,0,0>(0, list, buffers, host_start, nlocal);
|
||||
#endif
|
||||
}
|
||||
|
||||
template <class flt_t, class acc_t, int offload_noghost, int need_ic>
|
||||
void NPairHalfBinNewtonTriIntel::
|
||||
hbnti(const int offload, NeighList *list, IntelBuffers<flt_t,acc_t> *buffers,
|
||||
const int astart, const int aend, const int offload_end) {
|
||||
if (aend-astart == 0) return;
|
||||
|
||||
const int nall = atom->nlocal + atom->nghost;
|
||||
int pad = 1;
|
||||
int nall_t = nall;
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (offload_noghost && offload) nall_t = atom->nlocal;
|
||||
if (offload) {
|
||||
if (INTEL_MIC_NBOR_PAD > 1)
|
||||
pad = INTEL_MIC_NBOR_PAD * sizeof(float) / sizeof(flt_t);
|
||||
} else
|
||||
#endif
|
||||
if (INTEL_NBOR_PAD > 1)
|
||||
pad = INTEL_NBOR_PAD * sizeof(float) / sizeof(flt_t);
|
||||
const int pad_width = pad;
|
||||
|
||||
const ATOM_T * _noalias const x = buffers->get_x();
|
||||
int * _noalias const firstneigh = buffers->firstneigh(list);
|
||||
const int e_nall = nall_t;
|
||||
|
||||
const int molecular = atom->molecular;
|
||||
int *ns = NULL;
|
||||
tagint *s = NULL;
|
||||
int tag_size = 0, special_size;
|
||||
if (buffers->need_tag()) tag_size = e_nall;
|
||||
if (molecular) {
|
||||
s = atom->special[0];
|
||||
ns = atom->nspecial[0];
|
||||
special_size = aend;
|
||||
} else {
|
||||
s = &buffers->_special_holder;
|
||||
ns = &buffers->_nspecial_holder;
|
||||
special_size = 0;
|
||||
}
|
||||
const tagint * _noalias const special = s;
|
||||
const int * _noalias const nspecial = ns;
|
||||
const int maxspecial = atom->maxspecial;
|
||||
const tagint * _noalias const tag = atom->tag;
|
||||
|
||||
int * _noalias const ilist = list->ilist;
|
||||
int * _noalias numneigh = list->numneigh;
|
||||
int * _noalias const cnumneigh = buffers->cnumneigh(list);
|
||||
const int nstencil = this->nstencil;
|
||||
const int * _noalias const stencil = this->stencil;
|
||||
const flt_t * _noalias const cutneighsq = buffers->get_cutneighsq()[0];
|
||||
const int ntypes = atom->ntypes + 1;
|
||||
const int nlocal = atom->nlocal;
|
||||
|
||||
#ifndef _LMP_INTEL_OFFLOAD
|
||||
int * const mask = atom->mask;
|
||||
tagint * const molecule = atom->molecule;
|
||||
#endif
|
||||
|
||||
int tnum;
|
||||
int *overflow;
|
||||
double *timer_compute;
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (offload) {
|
||||
timer_compute = _fix->off_watch_neighbor();
|
||||
tnum = buffers->get_off_threads();
|
||||
overflow = _fix->get_off_overflow_flag();
|
||||
_fix->stop_watch(TIME_HOST_NEIGHBOR);
|
||||
_fix->start_watch(TIME_OFFLOAD_LATENCY);
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
tnum = comm->nthreads;
|
||||
overflow = _fix->get_overflow_flag();
|
||||
}
|
||||
const int nthreads = tnum;
|
||||
const int maxnbors = buffers->get_max_nbors();
|
||||
int * _noalias const atombin = buffers->get_atombin();
|
||||
const int * _noalias const binpacked = buffers->get_binpacked();
|
||||
|
||||
const int xperiodic = domain->xperiodic;
|
||||
const int yperiodic = domain->yperiodic;
|
||||
const int zperiodic = domain->zperiodic;
|
||||
const flt_t xprd_half = domain->xprd_half;
|
||||
const flt_t yprd_half = domain->yprd_half;
|
||||
const flt_t zprd_half = domain->zprd_half;
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
const int * _noalias const binhead = this->binhead;
|
||||
const int * _noalias const bins = this->bins;
|
||||
const int cop = _fix->coprocessor_number();
|
||||
const int separate_buffers = _fix->separate_buffers();
|
||||
#pragma offload target(mic:cop) if(offload) \
|
||||
in(x:length(e_nall+1) alloc_if(0) free_if(0)) \
|
||||
in(tag:length(tag_size) alloc_if(0) free_if(0)) \
|
||||
in(special:length(special_size*maxspecial) alloc_if(0) free_if(0)) \
|
||||
in(nspecial:length(special_size*3) alloc_if(0) free_if(0)) \
|
||||
in(bins,binpacked:length(nall) alloc_if(0) free_if(0)) \
|
||||
in(binhead:length(mbins+1) alloc_if(0) free_if(0)) \
|
||||
in(cutneighsq:length(0) alloc_if(0) free_if(0)) \
|
||||
in(firstneigh:length(0) alloc_if(0) free_if(0)) \
|
||||
in(cnumneigh:length(0) alloc_if(0) free_if(0)) \
|
||||
out(numneigh:length(0) alloc_if(0) free_if(0)) \
|
||||
in(ilist:length(0) alloc_if(0) free_if(0)) \
|
||||
in(atombin:length(aend) alloc_if(0) free_if(0)) \
|
||||
in(stencil:length(nstencil) alloc_if(0) free_if(0)) \
|
||||
in(maxnbors,nthreads,maxspecial,nstencil,offload_end,pad_width,e_nall) \
|
||||
in(offload,separate_buffers, astart, aend, nlocal, molecular, ntypes) \
|
||||
in(xperiodic, yperiodic, zperiodic, xprd_half, yprd_half, zprd_half) \
|
||||
out(overflow:length(5) alloc_if(0) free_if(0)) \
|
||||
out(timer_compute:length(1) alloc_if(0) free_if(0)) \
|
||||
signal(tag)
|
||||
#endif
|
||||
{
|
||||
#if defined(__MIC__) && defined(_LMP_INTEL_OFFLOAD)
|
||||
*timer_compute = MIC_Wtime();
|
||||
#endif
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
overflow[LMP_LOCAL_MIN] = astart;
|
||||
overflow[LMP_LOCAL_MAX] = aend - 1;
|
||||
overflow[LMP_GHOST_MIN] = e_nall;
|
||||
overflow[LMP_GHOST_MAX] = -1;
|
||||
#endif
|
||||
|
||||
int nstencilp = 0;
|
||||
int binstart[INTEL_MAX_STENCIL], binend[INTEL_MAX_STENCIL];
|
||||
for (int k = 0; k < nstencil; k++) {
|
||||
binstart[nstencilp] = stencil[k];
|
||||
int end = stencil[k] + 1;
|
||||
for (int kk = k + 1; kk < nstencil; kk++) {
|
||||
if (stencil[kk-1]+1 == stencil[kk]) {
|
||||
end++;
|
||||
k++;
|
||||
} else break;
|
||||
}
|
||||
binend[nstencilp] = end;
|
||||
nstencilp++;
|
||||
}
|
||||
|
||||
#if defined(_OPENMP)
|
||||
#pragma omp parallel default(none) \
|
||||
shared(numneigh, overflow, nstencilp, binstart, binend)
|
||||
#endif
|
||||
{
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
int lmin = e_nall, lmax = -1, gmin = e_nall, gmax = -1;
|
||||
#endif
|
||||
|
||||
const int num = aend - astart;
|
||||
int tid, ifrom, ito;
|
||||
IP_PRE_omp_range_id(ifrom, ito, tid, num, nthreads);
|
||||
ifrom += astart;
|
||||
ito += astart;
|
||||
|
||||
int which;
|
||||
|
||||
const int list_size = (ito + tid * 2 + 2) * maxnbors;
|
||||
int ct = (ifrom + tid * 2) * maxnbors;
|
||||
int *neighptr = firstneigh + ct;
|
||||
const int obound = maxnbors * 3;
|
||||
|
||||
for (int i = ifrom; i < ito; i++) {
|
||||
const flt_t xtmp = x[i].x;
|
||||
const flt_t ytmp = x[i].y;
|
||||
const flt_t ztmp = x[i].z;
|
||||
const int itype = x[i].w;
|
||||
const int ioffset = ntypes * itype;
|
||||
|
||||
// loop over all atoms in bins in stencil
|
||||
// pairs for atoms j "below" i are excluded
|
||||
// below = lower z or (equal z and lower y) or (equal zy and lower x)
|
||||
// (equal zyx and j <= i)
|
||||
// latter excludes self-self interaction but allows superposed atoms
|
||||
|
||||
const int ibin = atombin[i];
|
||||
|
||||
int raw_count = maxnbors;
|
||||
for (int k = 0; k < nstencilp; k++) {
|
||||
const int bstart = binhead[ibin + binstart[k]];
|
||||
const int bend = binhead[ibin + binend[k]];
|
||||
for (int jj = bstart; jj < bend; jj++) {
|
||||
const int j = binpacked[jj];
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (offload_noghost) {
|
||||
if (j < nlocal) {
|
||||
if (i < offload_end) continue;
|
||||
} else if (offload) continue;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (x[j].z < ztmp) continue;
|
||||
if (x[j].z == ztmp) {
|
||||
if (x[j].y < ytmp) continue;
|
||||
if (x[j].y == ytmp) {
|
||||
if (x[j].x < xtmp) continue;
|
||||
if (x[j].x == xtmp && j <= i) continue;
|
||||
}
|
||||
}
|
||||
|
||||
#ifndef _LMP_INTEL_OFFLOAD
|
||||
if (exclude) {
|
||||
const int jtype = x[j].w;
|
||||
if (exclusion(i,j,itype,jtype,mask,molecule)) continue;
|
||||
}
|
||||
#endif
|
||||
|
||||
neighptr[raw_count++] = j;
|
||||
}
|
||||
}
|
||||
if (raw_count > obound)
|
||||
*overflow = 1;
|
||||
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
int vlmin = lmin, vlmax = lmax, vgmin = gmin, vgmax = gmax;
|
||||
#if __INTEL_COMPILER+0 > 1499
|
||||
#pragma vector aligned
|
||||
#pragma simd reduction(max:vlmax,vgmax) reduction(min:vlmin, vgmin)
|
||||
#endif
|
||||
#else
|
||||
#pragma vector aligned
|
||||
#pragma simd
|
||||
#endif
|
||||
#endif
|
||||
for (int u = maxnbors; u < raw_count; u++) {
|
||||
int j = neighptr[u];
|
||||
const flt_t delx = xtmp - x[j].x;
|
||||
const flt_t dely = ytmp - x[j].y;
|
||||
const flt_t delz = ztmp - x[j].z;
|
||||
const int jtype = x[j].w;
|
||||
const flt_t rsq = delx * delx + dely * dely + delz * delz;
|
||||
if (rsq > cutneighsq[ioffset + jtype])
|
||||
neighptr[u] = e_nall;
|
||||
else {
|
||||
if (need_ic) {
|
||||
int no_special;
|
||||
ominimum_image_check(no_special, delx, dely, delz);
|
||||
if (no_special)
|
||||
neighptr[u] = -j - 1;
|
||||
}
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (j < nlocal) {
|
||||
if (j < vlmin) vlmin = j;
|
||||
if (j > vlmax) vlmax = j;
|
||||
} else {
|
||||
if (j < vgmin) vgmin = j;
|
||||
if (j > vgmax) vgmax = j;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
int n = 0, n2 = maxnbors;
|
||||
for (int u = maxnbors; u < raw_count; u++) {
|
||||
const int j = neighptr[u];
|
||||
int pj = j;
|
||||
if (pj < e_nall) {
|
||||
if (need_ic)
|
||||
if (pj < 0) pj = -pj - 1;
|
||||
|
||||
if (pj < nlocal)
|
||||
neighptr[n++] = j;
|
||||
else
|
||||
neighptr[n2++] = j;
|
||||
}
|
||||
}
|
||||
int ns = n;
|
||||
for (int u = maxnbors; u < n2; u++)
|
||||
neighptr[n++] = neighptr[u];
|
||||
|
||||
ilist[i] = i;
|
||||
cnumneigh[i] = ct;
|
||||
ns += n2 - maxnbors;
|
||||
|
||||
int edge = (ns % pad_width);
|
||||
if (edge) {
|
||||
const int pad_end = ns + (pad_width - edge);
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma loop_count min=1, max=15, avg=8
|
||||
#endif
|
||||
for ( ; ns < pad_end; ns++)
|
||||
neighptr[ns] = e_nall;
|
||||
}
|
||||
numneigh[i] = ns;
|
||||
|
||||
ct += ns;
|
||||
const int alignb = (INTEL_DATA_ALIGN / sizeof(int));
|
||||
edge = (ct % alignb);
|
||||
if (edge) ct += alignb - edge;
|
||||
neighptr = firstneigh + ct;
|
||||
if (ct + obound > list_size) {
|
||||
if (i < ito - 1) {
|
||||
*overflow = 1;
|
||||
ct = (ifrom + tid * 2) * maxnbors;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (*overflow == 1)
|
||||
for (int i = ifrom; i < ito; i++)
|
||||
numneigh[i] = 0;
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (separate_buffers) {
|
||||
#if defined(_OPENMP)
|
||||
#pragma omp critical
|
||||
#endif
|
||||
{
|
||||
if (lmin < overflow[LMP_LOCAL_MIN]) overflow[LMP_LOCAL_MIN] = lmin;
|
||||
if (lmax > overflow[LMP_LOCAL_MAX]) overflow[LMP_LOCAL_MAX] = lmax;
|
||||
if (gmin < overflow[LMP_GHOST_MIN]) overflow[LMP_GHOST_MIN] = gmin;
|
||||
if (gmax > overflow[LMP_GHOST_MAX]) overflow[LMP_GHOST_MAX] = gmax;
|
||||
}
|
||||
#pragma omp barrier
|
||||
}
|
||||
|
||||
int ghost_offset = 0, nall_offset = e_nall;
|
||||
if (separate_buffers) {
|
||||
int nghost = overflow[LMP_GHOST_MAX] + 1 - overflow[LMP_GHOST_MIN];
|
||||
if (nghost < 0) nghost = 0;
|
||||
if (offload) {
|
||||
ghost_offset = overflow[LMP_GHOST_MIN] - overflow[LMP_LOCAL_MAX] - 1;
|
||||
nall_offset = overflow[LMP_LOCAL_MAX] + 1 + nghost;
|
||||
} else {
|
||||
ghost_offset = overflow[LMP_GHOST_MIN] - nlocal;
|
||||
nall_offset = nlocal + nghost;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
if (molecular) {
|
||||
for (int i = ifrom; i < ito; ++i) {
|
||||
int * _noalias jlist = firstneigh + cnumneigh[i];
|
||||
const int jnum = numneigh[i];
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma vector aligned
|
||||
#pragma simd
|
||||
#endif
|
||||
for (int jj = 0; jj < jnum; jj++) {
|
||||
const int j = jlist[jj];
|
||||
if (need_ic && j < 0) {
|
||||
which = 0;
|
||||
jlist[jj] = -j - 1;
|
||||
} else
|
||||
ofind_special(which, special, nspecial, i, tag[j]);
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (j >= nlocal) {
|
||||
if (j == e_nall)
|
||||
jlist[jj] = nall_offset;
|
||||
else if (which)
|
||||
jlist[jj] = (j-ghost_offset) ^ (which << SBBITS);
|
||||
else jlist[jj]-=ghost_offset;
|
||||
} else
|
||||
#endif
|
||||
if (which) jlist[jj] = j ^ (which << SBBITS);
|
||||
}
|
||||
}
|
||||
}
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
else if (separate_buffers) {
|
||||
for (int i = ifrom; i < ito; ++i) {
|
||||
int * _noalias jlist = firstneigh + cnumneigh[i];
|
||||
const int jnum = numneigh[i];
|
||||
int jj = 0;
|
||||
for (jj = 0; jj < jnum; jj++)
|
||||
if (jlist[jj] >= nlocal) break;
|
||||
while (jj < jnum) {
|
||||
if (jlist[jj] == e_nall) jlist[jj] = nall_offset;
|
||||
else jlist[jj] -= ghost_offset;
|
||||
jj++;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
} // end omp
|
||||
#if defined(__MIC__) && defined(_LMP_INTEL_OFFLOAD)
|
||||
*timer_compute = MIC_Wtime() - *timer_compute;
|
||||
#endif
|
||||
} // end offload
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (offload) {
|
||||
_fix->stop_watch(TIME_OFFLOAD_LATENCY);
|
||||
_fix->start_watch(TIME_HOST_NEIGHBOR);
|
||||
for (int n = 0; n < aend; n++) {
|
||||
ilist[n] = n;
|
||||
numneigh[n] = 0;
|
||||
}
|
||||
} else {
|
||||
for (int i = astart; i < aend; i++)
|
||||
list->firstneigh[i] = firstneigh + cnumneigh[i];
|
||||
if (separate_buffers) {
|
||||
_fix->start_watch(TIME_PACK);
|
||||
_fix->set_neighbor_host_sizes();
|
||||
buffers->pack_sep_from_single(_fix->host_min_local(),
|
||||
_fix->host_used_local(),
|
||||
_fix->host_min_ghost(),
|
||||
_fix->host_used_ghost());
|
||||
_fix->stop_watch(TIME_PACK);
|
||||
}
|
||||
}
|
||||
#else
|
||||
for (int i = astart; i < aend; i++)
|
||||
list->firstneigh[i] = firstneigh + cnumneigh[i];
|
||||
#endif
|
||||
}
|
||||
51
src/USER-INTEL/npair_half_bin_newton_tri_intel.h
Normal file
51
src/USER-INTEL/npair_half_bin_newton_tri_intel.h
Normal file
@ -0,0 +1,51 @@
|
||||
/* -*- c++ -*- ----------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef NPAIR_CLASS
|
||||
|
||||
NPairStyle(half/bin/newton/tri/intel,
|
||||
NPairHalfBinNewtonTriIntel,
|
||||
NP_HALF | NP_BIN | NP_NEWTON | NP_TRI | NP_INTEL)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_NPAIR_HALF_BIN_NEWTON_INTEL_TRI_H
|
||||
#define LMP_NPAIR_HALF_BIN_NEWTON_INTEL_TRI_H
|
||||
|
||||
#include "npair_intel.h"
|
||||
#include "fix_intel.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class NPairHalfBinNewtonTriIntel : public NPairIntel {
|
||||
public:
|
||||
NPairHalfBinNewtonTriIntel(class LAMMPS *);
|
||||
~NPairHalfBinNewtonTriIntel() {}
|
||||
void build(class NeighList *);
|
||||
|
||||
private:
|
||||
template <class flt_t, class acc_t>
|
||||
void hbnti(NeighList *, IntelBuffers<flt_t,acc_t> *);
|
||||
template <class flt_t, class acc_t, int, int>
|
||||
void hbnti(const int, NeighList *, IntelBuffers<flt_t,acc_t> *, const int,
|
||||
const int, const int offload_end = 0);
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* ERROR/WARNING messages:
|
||||
|
||||
*/
|
||||
69
src/USER-INTEL/npair_intel.cpp
Normal file
69
src/USER-INTEL/npair_intel.cpp
Normal file
@ -0,0 +1,69 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing author: W. Michael Brown (Intel)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "npair_intel.h"
|
||||
#include "nstencil.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
NPairIntel::NPairIntel(LAMMPS *lmp) : NPair(lmp) {
|
||||
int ifix = modify->find_fix("package_intel");
|
||||
if (ifix < 0)
|
||||
error->all(FLERR,
|
||||
"The 'package intel' command is required for /intel styles");
|
||||
_fix = static_cast<FixIntel *>(modify->fix[ifix]);
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
_cop = _fix->coprocessor_number();
|
||||
_off_map_stencil = 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
NPairIntel::~NPairIntel() {
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (_off_map_stencil) {
|
||||
const int * stencil = this->stencil;
|
||||
#pragma offload_transfer target(mic:_cop) \
|
||||
nocopy(stencil:alloc_if(0) free_if(1))
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
copy needed info from NStencil class to this build class
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
void NPairIntel::grow_stencil()
|
||||
{
|
||||
if (_off_map_stencil != stencil) {
|
||||
if (_off_map_stencil) {
|
||||
const int * stencil = _off_map_stencil;
|
||||
#pragma offload_transfer target(mic:_cop) \
|
||||
nocopy(stencil:alloc_if(0) free_if(1))
|
||||
}
|
||||
_off_map_stencil = stencil;
|
||||
const int * stencil = _off_map_stencil;
|
||||
const int maxstencil = ns->get_maxstencil();
|
||||
#pragma offload_transfer target(mic:_cop) \
|
||||
in(stencil:length(maxstencil) alloc_if(1) free_if(0))
|
||||
}
|
||||
}
|
||||
#endif
|
||||
117
src/USER-INTEL/npair_intel.h
Normal file
117
src/USER-INTEL/npair_intel.h
Normal file
@ -0,0 +1,117 @@
|
||||
/* -*- c++ -*- ----------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifndef LMP_NPAIR_INTEL_H
|
||||
#define LMP_NPAIR_INTEL_H
|
||||
|
||||
#include "npair.h"
|
||||
#include "fix_intel.h"
|
||||
|
||||
#if defined(_OPENMP)
|
||||
#include <omp.h>
|
||||
#endif
|
||||
|
||||
#ifdef LMP_USE_AVXCD
|
||||
#include "intel_simd.h"
|
||||
#endif
|
||||
|
||||
#ifdef OUTER_CHUNK
|
||||
#include "intel_simd.h"
|
||||
#endif
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
#pragma offload_attribute(push,target(mic))
|
||||
#endif
|
||||
|
||||
#define ofind_special(which, special, nspecial, i, tag) \
|
||||
{ \
|
||||
which = 0; \
|
||||
const int n1 = nspecial[i * 3]; \
|
||||
const int n2 = nspecial[i * 3 + 1]; \
|
||||
const int n3 = nspecial[i * 3 + 2]; \
|
||||
const tagint *sptr = special + i * maxspecial; \
|
||||
for (int s = 0; s < n3; s++) { \
|
||||
if (sptr[s] == tag) { \
|
||||
if (s < n1) { \
|
||||
which = 1; \
|
||||
} else if (s < n2) { \
|
||||
which = 2; \
|
||||
} else { \
|
||||
which = 3; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
#define ominimum_image_check(answer, dx, dy, dz) \
|
||||
{ \
|
||||
answer = 0; \
|
||||
if (xperiodic && fabs(dx) > xprd_half) answer = 1; \
|
||||
if (yperiodic && fabs(dy) > yprd_half) answer = 1; \
|
||||
if (zperiodic && fabs(dz) > zprd_half) answer = 1; \
|
||||
}
|
||||
|
||||
#define dminimum_image_check(answer, dx, dy, dz) \
|
||||
{ \
|
||||
answer = 0; \
|
||||
if (domain->xperiodic && fabs(dx) > domain->xprd_half) answer = 1; \
|
||||
if (domain->yperiodic && fabs(dy) > domain->yprd_half) answer = 1; \
|
||||
if (domain->zperiodic && fabs(dz) > domain->zprd_half) answer = 1; \
|
||||
}
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
#pragma offload_attribute(pop)
|
||||
#endif
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class NPairIntel : public NPair {
|
||||
public:
|
||||
NPairIntel(class LAMMPS *);
|
||||
~NPairIntel();
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
void grow_stencil();
|
||||
#endif
|
||||
|
||||
protected:
|
||||
FixIntel *_fix;
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
int _cop;
|
||||
int *_off_map_stencil;
|
||||
#endif
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/* ERROR/WARNING messages:
|
||||
|
||||
E: Exclusion lists not yet supported for Intel offload
|
||||
|
||||
Self explanatory.
|
||||
|
||||
E: The 'package intel' command is required for /intel styles
|
||||
|
||||
Self explanatory.
|
||||
|
||||
E: Too many neighbor bins for USER-INTEL package.
|
||||
|
||||
The number of bins used in the stencil to check for neighboring atoms is too
|
||||
high for the Intel package. Either increase the bin size in the input script
|
||||
or recompile with a larger setting for INTEL_MAX_STENCIL in intel_preprocess.h.
|
||||
|
||||
*/
|
||||
|
||||
@ -1,621 +0,0 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "neighbor.h"
|
||||
#include "neighbor_omp.h"
|
||||
#include "neigh_list.h"
|
||||
#include "atom.h"
|
||||
#include "atom_vec.h"
|
||||
#include "molecule.h"
|
||||
#include "comm.h"
|
||||
#include "domain.h"
|
||||
#include "group.h"
|
||||
#include "my_page.h"
|
||||
#include "error.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
N^2 search for all neighbors
|
||||
every neighbor pair appears in list of both atoms i and j
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void Neighbor::full_nsq_omp(NeighList *list)
|
||||
{
|
||||
const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
|
||||
const int bitmask = (includegroup) ? group->bitmask[includegroup] : 0;
|
||||
const int molecular = atom->molecular;
|
||||
const int moltemplate = (molecular == 2) ? 1 : 0;
|
||||
|
||||
NEIGH_OMP_INIT;
|
||||
#if defined(_OPENMP)
|
||||
#pragma omp parallel default(none) shared(list)
|
||||
#endif
|
||||
NEIGH_OMP_SETUP(nlocal);
|
||||
|
||||
int i,j,n,itype,jtype,which,imol,iatom;
|
||||
tagint tagprev;
|
||||
double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
|
||||
int *neighptr;
|
||||
|
||||
double **x = atom->x;
|
||||
int *type = atom->type;
|
||||
int *mask = atom->mask;
|
||||
tagint *tag = atom->tag;
|
||||
tagint *molecule = atom->molecule;
|
||||
tagint **special = atom->special;
|
||||
int **nspecial = atom->nspecial;
|
||||
|
||||
int nall = atom->nlocal + atom->nghost;
|
||||
int *molindex = atom->molindex;
|
||||
int *molatom = atom->molatom;
|
||||
Molecule **onemols = atom->avec->onemols;
|
||||
|
||||
int *ilist = list->ilist;
|
||||
int *numneigh = list->numneigh;
|
||||
int **firstneigh = list->firstneigh;
|
||||
|
||||
// each thread has its own page allocator
|
||||
MyPage<int> &ipage = list->ipage[tid];
|
||||
ipage.reset();
|
||||
|
||||
// loop over owned atoms, storing neighbors
|
||||
|
||||
for (i = ifrom; i < ito; i++) {
|
||||
|
||||
n = 0;
|
||||
neighptr = ipage.vget();
|
||||
|
||||
itype = type[i];
|
||||
xtmp = x[i][0];
|
||||
ytmp = x[i][1];
|
||||
ztmp = x[i][2];
|
||||
if (moltemplate) {
|
||||
imol = molindex[i];
|
||||
iatom = molatom[i];
|
||||
tagprev = tag[i] - iatom - 1;
|
||||
}
|
||||
|
||||
// loop over all atoms, owned and ghost
|
||||
// skip i = j
|
||||
|
||||
for (j = 0; j < nall; j++) {
|
||||
if (includegroup && !(mask[j] & bitmask)) continue;
|
||||
if (i == j) continue;
|
||||
jtype = type[j];
|
||||
if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
|
||||
|
||||
delx = xtmp - x[j][0];
|
||||
dely = ytmp - x[j][1];
|
||||
delz = ztmp - x[j][2];
|
||||
rsq = delx*delx + dely*dely + delz*delz;
|
||||
if (rsq <= cutneighsq[itype][jtype]) {
|
||||
if (molecular) {
|
||||
if (!moltemplate)
|
||||
which = find_special(special[i],nspecial[i],tag[j]);
|
||||
else if (imol >=0)
|
||||
which = find_special(onemols[imol]->special[iatom],
|
||||
onemols[imol]->nspecial[iatom],
|
||||
tag[j]-tagprev);
|
||||
else which = 0;
|
||||
if (which == 0) neighptr[n++] = j;
|
||||
else if (domain->minimum_image_check(delx,dely,delz))
|
||||
neighptr[n++] = j;
|
||||
else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
|
||||
} else neighptr[n++] = j;
|
||||
}
|
||||
}
|
||||
|
||||
ilist[i] = i;
|
||||
firstneigh[i] = neighptr;
|
||||
numneigh[i] = n;
|
||||
ipage.vgot(n);
|
||||
if (ipage.status())
|
||||
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
|
||||
}
|
||||
NEIGH_OMP_CLOSE;
|
||||
list->inum = nlocal;
|
||||
list->gnum = 0;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
N^2 search for all neighbors
|
||||
include neighbors of ghost atoms, but no "special neighbors" for ghosts
|
||||
every neighbor pair appears in list of both atoms i and j
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void Neighbor::full_nsq_ghost_omp(NeighList *list)
|
||||
{
|
||||
const int nlocal = atom->nlocal;
|
||||
const int nall = nlocal + atom->nghost;
|
||||
const int molecular = atom->molecular;
|
||||
const int moltemplate = (molecular == 2) ? 1 : 0;
|
||||
|
||||
NEIGH_OMP_INIT;
|
||||
#if defined(_OPENMP)
|
||||
#pragma omp parallel default(none) shared(list)
|
||||
#endif
|
||||
NEIGH_OMP_SETUP(nall);
|
||||
|
||||
int i,j,n,itype,jtype,which,imol,iatom;
|
||||
tagint tagprev;
|
||||
double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
|
||||
int *neighptr;
|
||||
|
||||
double **x = atom->x;
|
||||
int *type = atom->type;
|
||||
int *mask = atom->mask;
|
||||
tagint *tag = atom->tag;
|
||||
tagint *molecule = atom->molecule;
|
||||
tagint **special = atom->special;
|
||||
int **nspecial = atom->nspecial;
|
||||
|
||||
int *molindex = atom->molindex;
|
||||
int *molatom = atom->molatom;
|
||||
Molecule **onemols = atom->avec->onemols;
|
||||
|
||||
int *ilist = list->ilist;
|
||||
int *numneigh = list->numneigh;
|
||||
int **firstneigh = list->firstneigh;
|
||||
|
||||
// each thread has its own page allocator
|
||||
MyPage<int> &ipage = list->ipage[tid];
|
||||
ipage.reset();
|
||||
|
||||
// loop over owned & ghost atoms, storing neighbors
|
||||
|
||||
for (i = ifrom; i < ito; i++) {
|
||||
|
||||
n = 0;
|
||||
neighptr = ipage.vget();
|
||||
|
||||
itype = type[i];
|
||||
xtmp = x[i][0];
|
||||
ytmp = x[i][1];
|
||||
ztmp = x[i][2];
|
||||
if (moltemplate) {
|
||||
imol = molindex[i];
|
||||
iatom = molatom[i];
|
||||
tagprev = tag[i] - iatom - 1;
|
||||
}
|
||||
|
||||
// loop over all atoms, owned and ghost
|
||||
// skip i = j
|
||||
// no molecular test when i = ghost atom
|
||||
|
||||
if (i < nlocal) {
|
||||
for (j = 0; j < nall; j++) {
|
||||
if (i == j) continue;
|
||||
jtype = type[j];
|
||||
if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
|
||||
|
||||
delx = xtmp - x[j][0];
|
||||
dely = ytmp - x[j][1];
|
||||
delz = ztmp - x[j][2];
|
||||
rsq = delx*delx + dely*dely + delz*delz;
|
||||
if (rsq <= cutneighsq[itype][jtype]) {
|
||||
if (molecular) {
|
||||
if (!moltemplate)
|
||||
which = find_special(special[i],nspecial[i],tag[j]);
|
||||
else if (imol >=0)
|
||||
which = find_special(onemols[imol]->special[iatom],
|
||||
onemols[imol]->nspecial[iatom],
|
||||
tag[j]-tagprev);
|
||||
else which = 0;
|
||||
if (which == 0) neighptr[n++] = j;
|
||||
else if (domain->minimum_image_check(delx,dely,delz))
|
||||
neighptr[n++] = j;
|
||||
else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
|
||||
} else neighptr[n++] = j;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (j = 0; j < nall; j++) {
|
||||
if (i == j) continue;
|
||||
jtype = type[j];
|
||||
if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
|
||||
|
||||
delx = xtmp - x[j][0];
|
||||
dely = ytmp - x[j][1];
|
||||
delz = ztmp - x[j][2];
|
||||
rsq = delx*delx + dely*dely + delz*delz;
|
||||
|
||||
if (rsq <= cutneighghostsq[itype][jtype]) neighptr[n++] = j;
|
||||
}
|
||||
}
|
||||
|
||||
ilist[i] = i;
|
||||
firstneigh[i] = neighptr;
|
||||
numneigh[i] = n;
|
||||
ipage.vgot(n);
|
||||
if (ipage.status())
|
||||
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
|
||||
}
|
||||
NEIGH_OMP_CLOSE;
|
||||
list->inum = nlocal;
|
||||
list->gnum = nall - nlocal;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
binned neighbor list construction for all neighbors
|
||||
every neighbor pair appears in list of both atoms i and j
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void Neighbor::full_bin_omp(NeighList *list)
|
||||
{
|
||||
// bin owned & ghost atoms
|
||||
|
||||
if (binatomflag) bin_atoms();
|
||||
|
||||
const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
|
||||
const int molecular = atom->molecular;
|
||||
const int moltemplate = (molecular == 2) ? 1 : 0;
|
||||
|
||||
NEIGH_OMP_INIT;
|
||||
#if defined(_OPENMP)
|
||||
#pragma omp parallel default(none) shared(list)
|
||||
#endif
|
||||
NEIGH_OMP_SETUP(nlocal);
|
||||
|
||||
int i,j,k,n,itype,jtype,ibin,which,imol,iatom;
|
||||
tagint tagprev;
|
||||
double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
|
||||
int *neighptr;
|
||||
|
||||
double **x = atom->x;
|
||||
int *type = atom->type;
|
||||
int *mask = atom->mask;
|
||||
tagint *tag = atom->tag;
|
||||
tagint *molecule = atom->molecule;
|
||||
tagint **special = atom->special;
|
||||
int **nspecial = atom->nspecial;
|
||||
int *molindex = atom->molindex;
|
||||
int *molatom = atom->molatom;
|
||||
Molecule **onemols = atom->avec->onemols;
|
||||
|
||||
int *ilist = list->ilist;
|
||||
int *numneigh = list->numneigh;
|
||||
int **firstneigh = list->firstneigh;
|
||||
int nstencil = list->nstencil;
|
||||
int *stencil = list->stencil;
|
||||
|
||||
// each thread has its own page allocator
|
||||
MyPage<int> &ipage = list->ipage[tid];
|
||||
ipage.reset();
|
||||
|
||||
// loop over owned atoms, storing neighbors
|
||||
|
||||
for (i = ifrom; i < ito; i++) {
|
||||
|
||||
n = 0;
|
||||
neighptr = ipage.vget();
|
||||
|
||||
itype = type[i];
|
||||
xtmp = x[i][0];
|
||||
ytmp = x[i][1];
|
||||
ztmp = x[i][2];
|
||||
if (moltemplate) {
|
||||
imol = molindex[i];
|
||||
iatom = molatom[i];
|
||||
tagprev = tag[i] - iatom - 1;
|
||||
}
|
||||
|
||||
// loop over all atoms in surrounding bins in stencil including self
|
||||
// skip i = j
|
||||
|
||||
ibin = coord2bin(x[i]);
|
||||
|
||||
for (k = 0; k < nstencil; k++) {
|
||||
for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) {
|
||||
if (i == j) continue;
|
||||
|
||||
jtype = type[j];
|
||||
if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
|
||||
|
||||
delx = xtmp - x[j][0];
|
||||
dely = ytmp - x[j][1];
|
||||
delz = ztmp - x[j][2];
|
||||
rsq = delx*delx + dely*dely + delz*delz;
|
||||
|
||||
if (rsq <= cutneighsq[itype][jtype]) {
|
||||
if (molecular) {
|
||||
if (!moltemplate)
|
||||
which = find_special(special[i],nspecial[i],tag[j]);
|
||||
else if (imol >=0)
|
||||
which = find_special(onemols[imol]->special[iatom],
|
||||
onemols[imol]->nspecial[iatom],
|
||||
tag[j]-tagprev);
|
||||
else which = 0;
|
||||
if (which == 0) neighptr[n++] = j;
|
||||
else if (domain->minimum_image_check(delx,dely,delz))
|
||||
neighptr[n++] = j;
|
||||
else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
|
||||
} else neighptr[n++] = j;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ilist[i] = i;
|
||||
firstneigh[i] = neighptr;
|
||||
numneigh[i] = n;
|
||||
ipage.vgot(n);
|
||||
if (ipage.status())
|
||||
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
|
||||
}
|
||||
NEIGH_OMP_CLOSE;
|
||||
list->inum = nlocal;
|
||||
list->gnum = 0;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
binned neighbor list construction for all neighbors
|
||||
include neighbors of ghost atoms, but no "special neighbors" for ghosts
|
||||
every neighbor pair appears in list of both atoms i and j
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void Neighbor::full_bin_ghost_omp(NeighList *list)
|
||||
{
|
||||
// bin owned & ghost atoms
|
||||
|
||||
if (binatomflag) bin_atoms();
|
||||
|
||||
const int nlocal = atom->nlocal;
|
||||
const int nall = nlocal + atom->nghost;
|
||||
const int molecular = atom->molecular;
|
||||
const int moltemplate = (molecular == 2) ? 1 : 0;
|
||||
|
||||
NEIGH_OMP_INIT;
|
||||
#if defined(_OPENMP)
|
||||
#pragma omp parallel default(none) shared(list)
|
||||
#endif
|
||||
NEIGH_OMP_SETUP(nall);
|
||||
|
||||
int i,j,k,n,itype,jtype,ibin,which,imol,iatom;
|
||||
tagint tagprev;
|
||||
double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
|
||||
int xbin,ybin,zbin,xbin2,ybin2,zbin2;
|
||||
int *neighptr;
|
||||
|
||||
double **x = atom->x;
|
||||
int *type = atom->type;
|
||||
int *mask = atom->mask;
|
||||
tagint *tag = atom->tag;
|
||||
tagint *molecule = atom->molecule;
|
||||
tagint **special = atom->special;
|
||||
int **nspecial = atom->nspecial;
|
||||
|
||||
int *molindex = atom->molindex;
|
||||
int *molatom = atom->molatom;
|
||||
Molecule **onemols = atom->avec->onemols;
|
||||
|
||||
int *ilist = list->ilist;
|
||||
int *numneigh = list->numneigh;
|
||||
int **firstneigh = list->firstneigh;
|
||||
int nstencil = list->nstencil;
|
||||
int *stencil = list->stencil;
|
||||
int **stencilxyz = list->stencilxyz;
|
||||
|
||||
// each thread has its own page allocator
|
||||
MyPage<int> &ipage = list->ipage[tid];
|
||||
ipage.reset();
|
||||
|
||||
// loop over owned & ghost atoms, storing neighbors
|
||||
|
||||
for (i = ifrom; i < ito; i++) {
|
||||
|
||||
n = 0;
|
||||
neighptr = ipage.vget();
|
||||
|
||||
itype = type[i];
|
||||
xtmp = x[i][0];
|
||||
ytmp = x[i][1];
|
||||
ztmp = x[i][2];
|
||||
if (moltemplate) {
|
||||
imol = molindex[i];
|
||||
iatom = molatom[i];
|
||||
tagprev = tag[i] - iatom - 1;
|
||||
}
|
||||
|
||||
// loop over all atoms in surrounding bins in stencil including self
|
||||
// when i is a ghost atom, must check if stencil bin is out of bounds
|
||||
// skip i = j
|
||||
// no molecular test when i = ghost atom
|
||||
|
||||
if (i < nlocal) {
|
||||
ibin = coord2bin(x[i]);
|
||||
for (k = 0; k < nstencil; k++) {
|
||||
for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) {
|
||||
if (i == j) continue;
|
||||
|
||||
jtype = type[j];
|
||||
if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
|
||||
|
||||
delx = xtmp - x[j][0];
|
||||
dely = ytmp - x[j][1];
|
||||
delz = ztmp - x[j][2];
|
||||
rsq = delx*delx + dely*dely + delz*delz;
|
||||
|
||||
if (rsq <= cutneighsq[itype][jtype]) {
|
||||
if (molecular) {
|
||||
if (!moltemplate)
|
||||
which = find_special(special[i],nspecial[i],tag[j]);
|
||||
else if (imol >=0)
|
||||
which = find_special(onemols[imol]->special[iatom],
|
||||
onemols[imol]->nspecial[iatom],
|
||||
tag[j]-tagprev);
|
||||
else which = 0;
|
||||
if (which == 0) neighptr[n++] = j;
|
||||
else if (domain->minimum_image_check(delx,dely,delz))
|
||||
neighptr[n++] = j;
|
||||
else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
|
||||
} else neighptr[n++] = j;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} else {
|
||||
ibin = coord2bin(x[i],xbin,ybin,zbin);
|
||||
for (k = 0; k < nstencil; k++) {
|
||||
xbin2 = xbin + stencilxyz[k][0];
|
||||
ybin2 = ybin + stencilxyz[k][1];
|
||||
zbin2 = zbin + stencilxyz[k][2];
|
||||
if (xbin2 < 0 || xbin2 >= mbinx ||
|
||||
ybin2 < 0 || ybin2 >= mbiny ||
|
||||
zbin2 < 0 || zbin2 >= mbinz) continue;
|
||||
for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) {
|
||||
if (i == j) continue;
|
||||
|
||||
jtype = type[j];
|
||||
if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
|
||||
|
||||
delx = xtmp - x[j][0];
|
||||
dely = ytmp - x[j][1];
|
||||
delz = ztmp - x[j][2];
|
||||
rsq = delx*delx + dely*dely + delz*delz;
|
||||
|
||||
if (rsq <= cutneighghostsq[itype][jtype]) neighptr[n++] = j;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ilist[i] = i;
|
||||
firstneigh[i] = neighptr;
|
||||
numneigh[i] = n;
|
||||
ipage.vgot(n);
|
||||
if (ipage.status())
|
||||
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
|
||||
}
|
||||
NEIGH_OMP_CLOSE;
|
||||
list->inum = nlocal;
|
||||
list->gnum = nall - nlocal;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
binned neighbor list construction for all neighbors
|
||||
multi-type stencil is itype dependent and is distance checked
|
||||
every neighbor pair appears in list of both atoms i and j
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void Neighbor::full_multi_omp(NeighList *list)
|
||||
{
|
||||
// bin local & ghost atoms
|
||||
|
||||
if (binatomflag) bin_atoms();
|
||||
|
||||
const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
|
||||
const int molecular = atom->molecular;
|
||||
const int moltemplate = (molecular == 2) ? 1 : 0;
|
||||
|
||||
NEIGH_OMP_INIT;
|
||||
#if defined(_OPENMP)
|
||||
#pragma omp parallel default(none) shared(list)
|
||||
#endif
|
||||
NEIGH_OMP_SETUP(nlocal);
|
||||
|
||||
int i,j,k,n,itype,jtype,ibin,which,ns,imol,iatom;
|
||||
tagint tagprev;
|
||||
double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
|
||||
int *neighptr,*s;
|
||||
double *cutsq,*distsq;
|
||||
|
||||
// loop over each atom, storing neighbors
|
||||
|
||||
double **x = atom->x;
|
||||
int *type = atom->type;
|
||||
int *mask = atom->mask;
|
||||
tagint *tag = atom->tag;
|
||||
tagint *molecule = atom->molecule;
|
||||
tagint **special = atom->special;
|
||||
int **nspecial = atom->nspecial;
|
||||
|
||||
int *molindex = atom->molindex;
|
||||
int *molatom = atom->molatom;
|
||||
Molecule **onemols = atom->avec->onemols;
|
||||
|
||||
int *ilist = list->ilist;
|
||||
int *numneigh = list->numneigh;
|
||||
int **firstneigh = list->firstneigh;
|
||||
int *nstencil_multi = list->nstencil_multi;
|
||||
int **stencil_multi = list->stencil_multi;
|
||||
double **distsq_multi = list->distsq_multi;
|
||||
|
||||
// each thread has its own page allocator
|
||||
MyPage<int> &ipage = list->ipage[tid];
|
||||
ipage.reset();
|
||||
|
||||
for (i = ifrom; i < ito; i++) {
|
||||
|
||||
n = 0;
|
||||
neighptr = ipage.vget();
|
||||
|
||||
itype = type[i];
|
||||
xtmp = x[i][0];
|
||||
ytmp = x[i][1];
|
||||
ztmp = x[i][2];
|
||||
if (moltemplate) {
|
||||
imol = molindex[i];
|
||||
iatom = molatom[i];
|
||||
tagprev = tag[i] - iatom - 1;
|
||||
}
|
||||
|
||||
// loop over all atoms in other bins in stencil, including self
|
||||
// skip if i,j neighbor cutoff is less than bin distance
|
||||
// skip i = j
|
||||
|
||||
ibin = coord2bin(x[i]);
|
||||
s = stencil_multi[itype];
|
||||
distsq = distsq_multi[itype];
|
||||
cutsq = cutneighsq[itype];
|
||||
ns = nstencil_multi[itype];
|
||||
for (k = 0; k < ns; k++) {
|
||||
for (j = binhead[ibin+s[k]]; j >= 0; j = bins[j]) {
|
||||
jtype = type[j];
|
||||
if (cutsq[jtype] < distsq[k]) continue;
|
||||
if (i == j) continue;
|
||||
|
||||
if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
|
||||
|
||||
delx = xtmp - x[j][0];
|
||||
dely = ytmp - x[j][1];
|
||||
delz = ztmp - x[j][2];
|
||||
rsq = delx*delx + dely*dely + delz*delz;
|
||||
|
||||
if (rsq <= cutneighsq[itype][jtype]) {
|
||||
if (molecular) {
|
||||
if (!moltemplate)
|
||||
which = find_special(special[i],nspecial[i],tag[j]);
|
||||
else if (imol >=0)
|
||||
which = find_special(onemols[imol]->special[iatom],
|
||||
onemols[imol]->nspecial[iatom],
|
||||
tag[j]-tagprev);
|
||||
else which = 0;
|
||||
if (which == 0) neighptr[n++] = j;
|
||||
else if (domain->minimum_image_check(delx,dely,delz))
|
||||
neighptr[n++] = j;
|
||||
else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
|
||||
} else neighptr[n++] = j;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ilist[i] = i;
|
||||
firstneigh[i] = neighptr;
|
||||
numneigh[i] = n;
|
||||
ipage.vgot(n);
|
||||
if (ipage.status())
|
||||
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
|
||||
}
|
||||
NEIGH_OMP_CLOSE;
|
||||
list->inum = nlocal;
|
||||
list->gnum = 0;
|
||||
}
|
||||
@ -1,618 +0,0 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include <string.h>
|
||||
#include "neighbor.h"
|
||||
#include "neighbor_omp.h"
|
||||
#include "neigh_list.h"
|
||||
#include "atom.h"
|
||||
#include "comm.h"
|
||||
#include "group.h"
|
||||
#include "fix_shear_history.h"
|
||||
#include "error.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
granular particles
|
||||
N^2 / 2 search for neighbor pairs with partial Newton's 3rd law
|
||||
shear history must be accounted for when a neighbor pair is added
|
||||
pair added to list if atoms i and j are both owned and i < j
|
||||
pair added if j is ghost (also stored by proc owning j)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void Neighbor::granular_nsq_no_newton_omp(NeighList *list)
|
||||
{
|
||||
const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
|
||||
const int bitmask = (includegroup) ? group->bitmask[includegroup] : 0;
|
||||
|
||||
FixShearHistory * const fix_history = list->fix_history;
|
||||
NeighList * listgranhistory = list->listgranhistory;
|
||||
|
||||
NEIGH_OMP_INIT;
|
||||
|
||||
#if defined(_OPENMP)
|
||||
#pragma omp parallel default(none) shared(list,listgranhistory)
|
||||
#endif
|
||||
NEIGH_OMP_SETUP(nlocal);
|
||||
|
||||
int i,j,m,n,nn,dnum,dnumbytes;
|
||||
double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
|
||||
double radi,radsum,cutsq;
|
||||
int *neighptr,*touchptr;
|
||||
double *shearptr;
|
||||
|
||||
int *npartner;
|
||||
tagint **partner;
|
||||
double **shearpartner;
|
||||
int **firsttouch;
|
||||
double **firstshear;
|
||||
MyPage<int> *ipage_touch;
|
||||
MyPage<double> *dpage_shear;
|
||||
|
||||
double **x = atom->x;
|
||||
double *radius = atom->radius;
|
||||
tagint *tag = atom->tag;
|
||||
int *type = atom->type;
|
||||
int *mask = atom->mask;
|
||||
tagint *molecule = atom->molecule;
|
||||
int nall = atom->nlocal + atom->nghost;
|
||||
|
||||
int *ilist = list->ilist;
|
||||
int *numneigh = list->numneigh;
|
||||
int **firstneigh = list->firstneigh;
|
||||
|
||||
// each thread has its own page allocator
|
||||
MyPage<int> &ipage = list->ipage[tid];
|
||||
ipage.reset();
|
||||
|
||||
if (fix_history) {
|
||||
npartner = fix_history->npartner;
|
||||
partner = fix_history->partner;
|
||||
shearpartner = fix_history->shearpartner;
|
||||
firsttouch = listgranhistory->firstneigh;
|
||||
firstshear = listgranhistory->firstdouble;
|
||||
ipage_touch = listgranhistory->ipage+tid;
|
||||
dpage_shear = listgranhistory->dpage+tid;
|
||||
dnum = listgranhistory->dnum;
|
||||
dnumbytes = dnum * sizeof(double);
|
||||
ipage_touch->reset();
|
||||
dpage_shear->reset();
|
||||
}
|
||||
|
||||
for (i = ifrom; i < ito; i++) {
|
||||
|
||||
n = 0;
|
||||
neighptr = ipage.vget();
|
||||
if (fix_history) {
|
||||
nn = 0;
|
||||
touchptr = ipage_touch->vget();
|
||||
shearptr = dpage_shear->vget();
|
||||
}
|
||||
|
||||
xtmp = x[i][0];
|
||||
ytmp = x[i][1];
|
||||
ztmp = x[i][2];
|
||||
radi = radius[i];
|
||||
|
||||
// loop over remaining atoms, owned and ghost
|
||||
|
||||
for (j = i+1; j < nall; j++) {
|
||||
if (includegroup && !(mask[j] & bitmask)) continue;
|
||||
if (exclude && exclusion(i,j,type[i],type[j],mask,molecule)) continue;
|
||||
|
||||
delx = xtmp - x[j][0];
|
||||
dely = ytmp - x[j][1];
|
||||
delz = ztmp - x[j][2];
|
||||
rsq = delx*delx + dely*dely + delz*delz;
|
||||
radsum = radi + radius[j];
|
||||
cutsq = (radsum+skin) * (radsum+skin);
|
||||
|
||||
if (rsq <= cutsq) {
|
||||
neighptr[n] = j;
|
||||
|
||||
if (fix_history) {
|
||||
if (rsq < radsum*radsum) {
|
||||
for (m = 0; m < npartner[i]; m++)
|
||||
if (partner[i][m] == tag[j]) break;
|
||||
if (m < npartner[i]) {
|
||||
touchptr[n] = 1;
|
||||
memcpy(&shearptr[nn],&shearpartner[i][dnum*m],dnumbytes);
|
||||
nn += dnum;
|
||||
} else {
|
||||
touchptr[n] = 0;
|
||||
memcpy(&shearptr[nn],zeroes,dnumbytes);
|
||||
nn += dnum;
|
||||
}
|
||||
} else {
|
||||
touchptr[n] = 0;
|
||||
memcpy(&shearptr[nn],zeroes,dnumbytes);
|
||||
nn += dnum;
|
||||
}
|
||||
}
|
||||
|
||||
n++;
|
||||
}
|
||||
}
|
||||
|
||||
ilist[i] = i;
|
||||
firstneigh[i] = neighptr;
|
||||
numneigh[i] = n;
|
||||
ipage.vgot(n);
|
||||
if (ipage.status())
|
||||
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
|
||||
|
||||
if (fix_history) {
|
||||
firsttouch[i] = touchptr;
|
||||
firstshear[i] = shearptr;
|
||||
ipage_touch->vgot(n);
|
||||
dpage_shear->vgot(nn);
|
||||
}
|
||||
}
|
||||
NEIGH_OMP_CLOSE;
|
||||
list->inum = nlocal;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
granular particles
|
||||
N^2 / 2 search for neighbor pairs with full Newton's 3rd law
|
||||
no shear history is allowed for this option
|
||||
pair added to list if atoms i and j are both owned and i < j
|
||||
if j is ghost only me or other proc adds pair
|
||||
decision based on itag,jtag tests
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void Neighbor::granular_nsq_newton_omp(NeighList *list)
|
||||
{
|
||||
const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
|
||||
const int bitmask = (includegroup) ? group->bitmask[includegroup] : 0;
|
||||
|
||||
NEIGH_OMP_INIT;
|
||||
#if defined(_OPENMP)
|
||||
#pragma omp parallel default(none) shared(list)
|
||||
#endif
|
||||
NEIGH_OMP_SETUP(nlocal);
|
||||
|
||||
int i,j,n,itag,jtag;
|
||||
double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
|
||||
double radi,radsum,cutsq;
|
||||
int *neighptr;
|
||||
|
||||
double **x = atom->x;
|
||||
double *radius = atom->radius;
|
||||
tagint *tag = atom->tag;
|
||||
int *type = atom->type;
|
||||
int *mask = atom->mask;
|
||||
tagint *molecule = atom->molecule;
|
||||
int nall = atom->nlocal + atom->nghost;
|
||||
|
||||
int *ilist = list->ilist;
|
||||
int *numneigh = list->numneigh;
|
||||
int **firstneigh = list->firstneigh;
|
||||
|
||||
// each thread has its own page allocator
|
||||
MyPage<int> &ipage = list->ipage[tid];
|
||||
ipage.reset();
|
||||
|
||||
for (i = ifrom; i < ito; i++) {
|
||||
|
||||
n = 0;
|
||||
neighptr = ipage.vget();
|
||||
|
||||
itag = tag[i];
|
||||
xtmp = x[i][0];
|
||||
ytmp = x[i][1];
|
||||
ztmp = x[i][2];
|
||||
radi = radius[i];
|
||||
|
||||
// loop over remaining atoms, owned and ghost
|
||||
|
||||
for (j = i+1; j < nall; j++) {
|
||||
if (includegroup && !(mask[j] & bitmask)) continue;
|
||||
|
||||
if (j >= nlocal) {
|
||||
jtag = tag[j];
|
||||
if (itag > jtag) {
|
||||
if ((itag+jtag) % 2 == 0) continue;
|
||||
} else if (itag < jtag) {
|
||||
if ((itag+jtag) % 2 == 1) continue;
|
||||
} else {
|
||||
if (x[j][2] < ztmp) continue;
|
||||
if (x[j][2] == ztmp) {
|
||||
if (x[j][1] < ytmp) continue;
|
||||
if (x[j][1] == ytmp && x[j][0] < xtmp) continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (exclude && exclusion(i,j,type[i],type[j],mask,molecule)) continue;
|
||||
|
||||
delx = xtmp - x[j][0];
|
||||
dely = ytmp - x[j][1];
|
||||
delz = ztmp - x[j][2];
|
||||
rsq = delx*delx + dely*dely + delz*delz;
|
||||
radsum = radi + radius[j];
|
||||
cutsq = (radsum+skin) * (radsum+skin);
|
||||
|
||||
if (rsq <= cutsq) neighptr[n++] = j;
|
||||
}
|
||||
|
||||
ilist[i] = i;
|
||||
firstneigh[i] = neighptr;
|
||||
numneigh[i] = n;
|
||||
ipage.vgot(n);
|
||||
if (ipage.status())
|
||||
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
|
||||
}
|
||||
NEIGH_OMP_CLOSE;
|
||||
list->inum = nlocal;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
granular particles
|
||||
binned neighbor list construction with partial Newton's 3rd law
|
||||
shear history must be accounted for when a neighbor pair is added
|
||||
each owned atom i checks own bin and surrounding bins in non-Newton stencil
|
||||
pair stored once if i,j are both owned and i < j
|
||||
pair stored by me if j is ghost (also stored by proc owning j)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void Neighbor::granular_bin_no_newton_omp(NeighList *list)
|
||||
{
|
||||
// bin local & ghost atoms
|
||||
|
||||
if (binatomflag) bin_atoms();
|
||||
|
||||
const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
|
||||
|
||||
FixShearHistory * const fix_history = list->fix_history;
|
||||
NeighList * listgranhistory = list->listgranhistory;
|
||||
|
||||
NEIGH_OMP_INIT;
|
||||
|
||||
#if defined(_OPENMP)
|
||||
#pragma omp parallel default(none) shared(list,listgranhistory)
|
||||
#endif
|
||||
NEIGH_OMP_SETUP(nlocal);
|
||||
|
||||
int i,j,k,m,n,nn,ibin,dnum,dnumbytes;
|
||||
double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
|
||||
double radi,radsum,cutsq;
|
||||
int *neighptr,*touchptr;
|
||||
double *shearptr;
|
||||
MyPage<int> *ipage_touch;
|
||||
MyPage<double> *dpage_shear;
|
||||
|
||||
int *npartner;
|
||||
tagint **partner;
|
||||
double **shearpartner;
|
||||
int **firsttouch;
|
||||
double **firstshear;
|
||||
|
||||
// loop over each atom, storing neighbors
|
||||
|
||||
double **x = atom->x;
|
||||
double *radius = atom->radius;
|
||||
tagint *tag = atom->tag;
|
||||
int *type = atom->type;
|
||||
int *mask = atom->mask;
|
||||
tagint *molecule = atom->molecule;
|
||||
|
||||
int *ilist = list->ilist;
|
||||
int *numneigh = list->numneigh;
|
||||
int **firstneigh = list->firstneigh;
|
||||
int nstencil = list->nstencil;
|
||||
int *stencil = list->stencil;
|
||||
|
||||
// each thread has its own page allocator
|
||||
MyPage<int> &ipage = list->ipage[tid];
|
||||
ipage.reset();
|
||||
|
||||
if (fix_history) {
|
||||
npartner = fix_history->npartner;
|
||||
partner = fix_history->partner;
|
||||
shearpartner = fix_history->shearpartner;
|
||||
firsttouch = listgranhistory->firstneigh;
|
||||
firstshear = listgranhistory->firstdouble;
|
||||
ipage_touch = listgranhistory->ipage+tid;
|
||||
dpage_shear = listgranhistory->dpage+tid;
|
||||
dnum = listgranhistory->dnum;
|
||||
dnumbytes = dnum * sizeof(double);
|
||||
ipage_touch->reset();
|
||||
dpage_shear->reset();
|
||||
}
|
||||
|
||||
for (i = ifrom; i < ito; i++) {
|
||||
|
||||
n = 0;
|
||||
neighptr = ipage.vget();
|
||||
if (fix_history) {
|
||||
nn = 0;
|
||||
touchptr = ipage_touch->vget();
|
||||
shearptr = dpage_shear->vget();
|
||||
}
|
||||
|
||||
xtmp = x[i][0];
|
||||
ytmp = x[i][1];
|
||||
ztmp = x[i][2];
|
||||
radi = radius[i];
|
||||
ibin = coord2bin(x[i]);
|
||||
|
||||
// loop over all atoms in surrounding bins in stencil including self
|
||||
// only store pair if i < j
|
||||
// stores own/own pairs only once
|
||||
// stores own/ghost pairs on both procs
|
||||
|
||||
for (k = 0; k < nstencil; k++) {
|
||||
for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) {
|
||||
if (j <= i) continue;
|
||||
if (exclude && exclusion(i,j,type[i],type[j],mask,molecule)) continue;
|
||||
|
||||
delx = xtmp - x[j][0];
|
||||
dely = ytmp - x[j][1];
|
||||
delz = ztmp - x[j][2];
|
||||
rsq = delx*delx + dely*dely + delz*delz;
|
||||
radsum = radi + radius[j];
|
||||
cutsq = (radsum+skin) * (radsum+skin);
|
||||
|
||||
if (rsq <= cutsq) {
|
||||
neighptr[n] = j;
|
||||
|
||||
if (fix_history) {
|
||||
if (rsq < radsum*radsum) {
|
||||
for (m = 0; m < npartner[i]; m++)
|
||||
if (partner[i][m] == tag[j]) break;
|
||||
if (m < npartner[i]) {
|
||||
touchptr[n] = 1;
|
||||
memcpy(&shearptr[nn],&shearpartner[i][dnum*m],dnumbytes);
|
||||
nn += dnum;
|
||||
} else {
|
||||
touchptr[n] = 0;
|
||||
memcpy(&shearptr[nn],zeroes,dnumbytes);
|
||||
nn += dnum;
|
||||
}
|
||||
} else {
|
||||
touchptr[n] = 0;
|
||||
memcpy(&shearptr[nn],zeroes,dnumbytes);
|
||||
nn += dnum;
|
||||
}
|
||||
}
|
||||
|
||||
n++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ilist[i] = i;
|
||||
firstneigh[i] = neighptr;
|
||||
numneigh[i] = n;
|
||||
ipage.vgot(n);
|
||||
if (ipage.status())
|
||||
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
|
||||
|
||||
if (fix_history) {
|
||||
firsttouch[i] = touchptr;
|
||||
firstshear[i] = shearptr;
|
||||
ipage_touch->vgot(n);
|
||||
dpage_shear->vgot(nn);
|
||||
}
|
||||
}
|
||||
NEIGH_OMP_CLOSE;
|
||||
list->inum = nlocal;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
granular particles
|
||||
binned neighbor list construction with full Newton's 3rd law
|
||||
no shear history is allowed for this option
|
||||
each owned atom i checks its own bin and other bins in Newton stencil
|
||||
every pair stored exactly once by some processor
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void Neighbor::granular_bin_newton_omp(NeighList *list)
|
||||
{
|
||||
// bin local & ghost atoms
|
||||
|
||||
if (binatomflag) bin_atoms();
|
||||
|
||||
const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
|
||||
|
||||
NEIGH_OMP_INIT;
|
||||
#if defined(_OPENMP)
|
||||
#pragma omp parallel default(none) shared(list)
|
||||
#endif
|
||||
NEIGH_OMP_SETUP(nlocal);
|
||||
|
||||
int i,j,k,n,ibin;
|
||||
double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
|
||||
double radi,radsum,cutsq;
|
||||
int *neighptr;
|
||||
|
||||
// loop over each atom, storing neighbors
|
||||
|
||||
double **x = atom->x;
|
||||
double *radius = atom->radius;
|
||||
int *type = atom->type;
|
||||
int *mask = atom->mask;
|
||||
tagint *molecule = atom->molecule;
|
||||
|
||||
int *ilist = list->ilist;
|
||||
int *numneigh = list->numneigh;
|
||||
int **firstneigh = list->firstneigh;
|
||||
int nstencil = list->nstencil;
|
||||
int *stencil = list->stencil;
|
||||
|
||||
// each thread has its own page allocator
|
||||
MyPage<int> &ipage = list->ipage[tid];
|
||||
ipage.reset();
|
||||
|
||||
for (i = ifrom; i < ito; i++) {
|
||||
|
||||
n = 0;
|
||||
neighptr = ipage.vget();
|
||||
|
||||
xtmp = x[i][0];
|
||||
ytmp = x[i][1];
|
||||
ztmp = x[i][2];
|
||||
radi = radius[i];
|
||||
|
||||
// loop over rest of atoms in i's bin, ghosts are at end of linked list
|
||||
// if j is owned atom, store it, since j is beyond i in linked list
|
||||
// if j is ghost, only store if j coords are "above and to the right" of i
|
||||
|
||||
for (j = bins[i]; j >= 0; j = bins[j]) {
|
||||
if (j >= nlocal) {
|
||||
if (x[j][2] < ztmp) continue;
|
||||
if (x[j][2] == ztmp) {
|
||||
if (x[j][1] < ytmp) continue;
|
||||
if (x[j][1] == ytmp && x[j][0] < xtmp) continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (exclude && exclusion(i,j,type[i],type[j],mask,molecule)) continue;
|
||||
|
||||
delx = xtmp - x[j][0];
|
||||
dely = ytmp - x[j][1];
|
||||
delz = ztmp - x[j][2];
|
||||
rsq = delx*delx + dely*dely + delz*delz;
|
||||
radsum = radi + radius[j];
|
||||
cutsq = (radsum+skin) * (radsum+skin);
|
||||
|
||||
if (rsq <= cutsq) neighptr[n++] = j;
|
||||
}
|
||||
|
||||
// loop over all atoms in other bins in stencil, store every pair
|
||||
|
||||
ibin = coord2bin(x[i]);
|
||||
for (k = 0; k < nstencil; k++) {
|
||||
for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) {
|
||||
if (exclude && exclusion(i,j,type[i],type[j],mask,molecule)) continue;
|
||||
|
||||
delx = xtmp - x[j][0];
|
||||
dely = ytmp - x[j][1];
|
||||
delz = ztmp - x[j][2];
|
||||
rsq = delx*delx + dely*dely + delz*delz;
|
||||
radsum = radi + radius[j];
|
||||
cutsq = (radsum+skin) * (radsum+skin);
|
||||
|
||||
if (rsq <= cutsq) neighptr[n++] = j;
|
||||
}
|
||||
}
|
||||
|
||||
ilist[i] = i;
|
||||
firstneigh[i] = neighptr;
|
||||
numneigh[i] = n;
|
||||
ipage.vgot(n);
|
||||
if (ipage.status())
|
||||
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
|
||||
}
|
||||
NEIGH_OMP_CLOSE;
|
||||
list->inum = nlocal;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
granular particles
|
||||
binned neighbor list construction with Newton's 3rd law for triclinic
|
||||
no shear history is allowed for this option
|
||||
each owned atom i checks its own bin and other bins in triclinic stencil
|
||||
every pair stored exactly once by some processor
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void Neighbor::granular_bin_newton_tri_omp(NeighList *list)
|
||||
{
|
||||
// bin local & ghost atoms
|
||||
|
||||
if (binatomflag) bin_atoms();
|
||||
|
||||
const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
|
||||
|
||||
NEIGH_OMP_INIT;
|
||||
#if defined(_OPENMP)
|
||||
#pragma omp parallel default(none) shared(list)
|
||||
#endif
|
||||
NEIGH_OMP_SETUP(nlocal);
|
||||
|
||||
int i,j,k,n,ibin;
|
||||
double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
|
||||
double radi,radsum,cutsq;
|
||||
int *neighptr;
|
||||
|
||||
// loop over each atom, storing neighbors
|
||||
|
||||
double **x = atom->x;
|
||||
double *radius = atom->radius;
|
||||
int *type = atom->type;
|
||||
int *mask = atom->mask;
|
||||
tagint *molecule = atom->molecule;
|
||||
|
||||
int *ilist = list->ilist;
|
||||
int *numneigh = list->numneigh;
|
||||
int **firstneigh = list->firstneigh;
|
||||
int nstencil = list->nstencil;
|
||||
int *stencil = list->stencil;
|
||||
|
||||
// each thread has its own page allocator
|
||||
MyPage<int> &ipage = list->ipage[tid];
|
||||
ipage.reset();
|
||||
|
||||
for (i = ifrom; i < ito; i++) {
|
||||
|
||||
n = 0;
|
||||
neighptr = ipage.vget();
|
||||
|
||||
xtmp = x[i][0];
|
||||
ytmp = x[i][1];
|
||||
ztmp = x[i][2];
|
||||
radi = radius[i];
|
||||
|
||||
// loop over all atoms in bins in stencil
|
||||
// pairs for atoms j "below" i are excluded
|
||||
// below = lower z or (equal z and lower y) or (equal zy and lower x)
|
||||
// (equal zyx and j <= i)
|
||||
// latter excludes self-self interaction but allows superposed atoms
|
||||
|
||||
ibin = coord2bin(x[i]);
|
||||
for (k = 0; k < nstencil; k++) {
|
||||
for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) {
|
||||
if (x[j][2] < ztmp) continue;
|
||||
if (x[j][2] == ztmp) {
|
||||
if (x[j][1] < ytmp) continue;
|
||||
if (x[j][1] == ytmp) {
|
||||
if (x[j][0] < xtmp) continue;
|
||||
if (x[j][0] == xtmp && j <= i) continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (exclude && exclusion(i,j,type[i],type[j],mask,molecule)) continue;
|
||||
|
||||
delx = xtmp - x[j][0];
|
||||
dely = ytmp - x[j][1];
|
||||
delz = ztmp - x[j][2];
|
||||
rsq = delx*delx + dely*dely + delz*delz;
|
||||
radsum = radi + radius[j];
|
||||
cutsq = (radsum+skin) * (radsum+skin);
|
||||
|
||||
if (rsq <= cutsq) neighptr[n++] = j;
|
||||
}
|
||||
}
|
||||
|
||||
ilist[i] = i;
|
||||
firstneigh[i] = neighptr;
|
||||
numneigh[i] = n;
|
||||
ipage.vgot(n);
|
||||
if (ipage.status())
|
||||
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
|
||||
}
|
||||
NEIGH_OMP_CLOSE;
|
||||
list->inum = nlocal;
|
||||
}
|
||||
@ -1,559 +0,0 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "neighbor.h"
|
||||
#include "neighbor_omp.h"
|
||||
#include "neigh_list.h"
|
||||
#include "atom.h"
|
||||
#include "atom_vec.h"
|
||||
#include "molecule.h"
|
||||
#include "comm.h"
|
||||
#include "domain.h"
|
||||
#include "my_page.h"
|
||||
#include "error.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
binned neighbor list construction with partial Newton's 3rd law
|
||||
each owned atom i checks own bin and other bins in stencil
|
||||
pair stored once if i,j are both owned and i < j
|
||||
pair stored by me if j is ghost (also stored by proc owning j)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void Neighbor::half_bin_no_newton_omp(NeighList *list)
|
||||
{
|
||||
// bin local & ghost atoms
|
||||
|
||||
if (binatomflag) bin_atoms();
|
||||
|
||||
const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
|
||||
const int molecular = atom->molecular;
|
||||
const int moltemplate = (molecular == 2) ? 1 : 0;
|
||||
|
||||
NEIGH_OMP_INIT;
|
||||
#if defined(_OPENMP)
|
||||
#pragma omp parallel default(none) shared(list)
|
||||
#endif
|
||||
NEIGH_OMP_SETUP(nlocal);
|
||||
|
||||
int i,j,k,n,itype,jtype,ibin,which,imol,iatom;
|
||||
tagint tagprev;
|
||||
double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
|
||||
int *neighptr;
|
||||
|
||||
// loop over each atom, storing neighbors
|
||||
|
||||
double **x = atom->x;
|
||||
int *type = atom->type;
|
||||
int *mask = atom->mask;
|
||||
tagint *tag = atom->tag;
|
||||
tagint *molecule = atom->molecule;
|
||||
tagint **special = atom->special;
|
||||
int **nspecial = atom->nspecial;
|
||||
|
||||
int *molindex = atom->molindex;
|
||||
int *molatom = atom->molatom;
|
||||
Molecule **onemols = atom->avec->onemols;
|
||||
|
||||
int *ilist = list->ilist;
|
||||
int *numneigh = list->numneigh;
|
||||
int **firstneigh = list->firstneigh;
|
||||
int nstencil = list->nstencil;
|
||||
int *stencil = list->stencil;
|
||||
|
||||
// each thread has its own page allocator
|
||||
MyPage<int> &ipage = list->ipage[tid];
|
||||
ipage.reset();
|
||||
|
||||
for (i = ifrom; i < ito; i++) {
|
||||
|
||||
n = 0;
|
||||
neighptr = ipage.vget();
|
||||
|
||||
itype = type[i];
|
||||
xtmp = x[i][0];
|
||||
ytmp = x[i][1];
|
||||
ztmp = x[i][2];
|
||||
if (moltemplate) {
|
||||
imol = molindex[i];
|
||||
iatom = molatom[i];
|
||||
tagprev = tag[i] - iatom - 1;
|
||||
}
|
||||
|
||||
// loop over all atoms in other bins in stencil including self
|
||||
// only store pair if i < j
|
||||
// stores own/own pairs only once
|
||||
// stores own/ghost pairs on both procs
|
||||
|
||||
ibin = coord2bin(x[i]);
|
||||
|
||||
for (k = 0; k < nstencil; k++) {
|
||||
for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) {
|
||||
if (j <= i) continue;
|
||||
|
||||
jtype = type[j];
|
||||
if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
|
||||
|
||||
delx = xtmp - x[j][0];
|
||||
dely = ytmp - x[j][1];
|
||||
delz = ztmp - x[j][2];
|
||||
rsq = delx*delx + dely*dely + delz*delz;
|
||||
|
||||
if (rsq <= cutneighsq[itype][jtype]) {
|
||||
if (molecular) {
|
||||
if (!moltemplate)
|
||||
which = find_special(special[i],nspecial[i],tag[j]);
|
||||
else if (imol >=0)
|
||||
which = find_special(onemols[imol]->special[iatom],
|
||||
onemols[imol]->nspecial[iatom],
|
||||
tag[j]-tagprev);
|
||||
else which = 0;
|
||||
if (which == 0) neighptr[n++] = j;
|
||||
else if (domain->minimum_image_check(delx,dely,delz))
|
||||
neighptr[n++] = j;
|
||||
else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
|
||||
} else neighptr[n++] = j;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ilist[i] = i;
|
||||
firstneigh[i] = neighptr;
|
||||
numneigh[i] = n;
|
||||
ipage.vgot(n);
|
||||
if (ipage.status())
|
||||
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
|
||||
}
|
||||
NEIGH_OMP_CLOSE;
|
||||
list->inum = nlocal;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
binned neighbor list construction with partial Newton's 3rd law
|
||||
include neighbors of ghost atoms, but no "special neighbors" for ghosts
|
||||
owned and ghost atoms check own bin and other bins in stencil
|
||||
pair stored once if i,j are both owned and i < j
|
||||
pair stored by me if i owned and j ghost (also stored by proc owning j)
|
||||
pair stored once if i,j are both ghost and i < j
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void Neighbor::half_bin_no_newton_ghost_omp(NeighList *list)
|
||||
{
|
||||
// bin local & ghost atoms
|
||||
|
||||
if (binatomflag) bin_atoms();
|
||||
|
||||
const int nlocal = atom->nlocal;
|
||||
const int nall = nlocal + atom->nghost;
|
||||
const int molecular = atom->molecular;
|
||||
const int moltemplate = (molecular == 2) ? 1 : 0;
|
||||
|
||||
NEIGH_OMP_INIT;
|
||||
#if defined(_OPENMP)
|
||||
#pragma omp parallel default(none) shared(list)
|
||||
#endif
|
||||
NEIGH_OMP_SETUP(nall);
|
||||
|
||||
int i,j,k,n,itype,jtype,ibin,which,imol,iatom;
|
||||
tagint tagprev;
|
||||
int xbin,ybin,zbin,xbin2,ybin2,zbin2;
|
||||
double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
|
||||
int *neighptr;
|
||||
|
||||
// loop over each atom, storing neighbors
|
||||
|
||||
double **x = atom->x;
|
||||
int *type = atom->type;
|
||||
int *mask = atom->mask;
|
||||
tagint *tag = atom->tag;
|
||||
tagint *molecule = atom->molecule;
|
||||
tagint **special = atom->special;
|
||||
int **nspecial = atom->nspecial;
|
||||
|
||||
int *molindex = atom->molindex;
|
||||
int *molatom = atom->molatom;
|
||||
Molecule **onemols = atom->avec->onemols;
|
||||
|
||||
int *ilist = list->ilist;
|
||||
int *numneigh = list->numneigh;
|
||||
int **firstneigh = list->firstneigh;
|
||||
int nstencil = list->nstencil;
|
||||
int *stencil = list->stencil;
|
||||
int **stencilxyz = list->stencilxyz;
|
||||
|
||||
// each thread has its own page allocator
|
||||
MyPage<int> &ipage = list->ipage[tid];
|
||||
ipage.reset();
|
||||
|
||||
for (i = ifrom; i < ito; i++) {
|
||||
|
||||
n = 0;
|
||||
neighptr = ipage.vget();
|
||||
|
||||
itype = type[i];
|
||||
xtmp = x[i][0];
|
||||
ytmp = x[i][1];
|
||||
ztmp = x[i][2];
|
||||
if (moltemplate) {
|
||||
imol = molindex[i];
|
||||
iatom = molatom[i];
|
||||
tagprev = tag[i] - iatom - 1;
|
||||
}
|
||||
|
||||
// loop over all atoms in other bins in stencil including self
|
||||
// when i is a ghost atom, must check if stencil bin is out of bounds
|
||||
// only store pair if i < j
|
||||
// stores own/own pairs only once
|
||||
// stores own/ghost pairs with owned atom only, on both procs
|
||||
// stores ghost/ghost pairs only once
|
||||
// no molecular test when i = ghost atom
|
||||
|
||||
if (i < nlocal) {
|
||||
ibin = coord2bin(x[i]);
|
||||
|
||||
for (k = 0; k < nstencil; k++) {
|
||||
for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) {
|
||||
if (j <= i) continue;
|
||||
|
||||
jtype = type[j];
|
||||
if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
|
||||
|
||||
delx = xtmp - x[j][0];
|
||||
dely = ytmp - x[j][1];
|
||||
delz = ztmp - x[j][2];
|
||||
rsq = delx*delx + dely*dely + delz*delz;
|
||||
|
||||
if (rsq <= cutneighsq[itype][jtype]) {
|
||||
if (molecular) {
|
||||
if (!moltemplate)
|
||||
which = find_special(special[i],nspecial[i],tag[j]);
|
||||
else if (imol >=0)
|
||||
which = find_special(onemols[imol]->special[iatom],
|
||||
onemols[imol]->nspecial[iatom],
|
||||
tag[j]-tagprev);
|
||||
else which = 0;
|
||||
if (which == 0) neighptr[n++] = j;
|
||||
else if (domain->minimum_image_check(delx,dely,delz))
|
||||
neighptr[n++] = j;
|
||||
else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
|
||||
} else neighptr[n++] = j;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} else {
|
||||
ibin = coord2bin(x[i],xbin,ybin,zbin);
|
||||
for (k = 0; k < nstencil; k++) {
|
||||
xbin2 = xbin + stencilxyz[k][0];
|
||||
ybin2 = ybin + stencilxyz[k][1];
|
||||
zbin2 = zbin + stencilxyz[k][2];
|
||||
if (xbin2 < 0 || xbin2 >= mbinx ||
|
||||
ybin2 < 0 || ybin2 >= mbiny ||
|
||||
zbin2 < 0 || zbin2 >= mbinz) continue;
|
||||
for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) {
|
||||
if (j <= i) continue;
|
||||
|
||||
jtype = type[j];
|
||||
if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
|
||||
|
||||
delx = xtmp - x[j][0];
|
||||
dely = ytmp - x[j][1];
|
||||
delz = ztmp - x[j][2];
|
||||
rsq = delx*delx + dely*dely + delz*delz;
|
||||
|
||||
if (rsq <= cutneighghostsq[itype][jtype]) neighptr[n++] = j;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ilist[i] = i;
|
||||
firstneigh[i] = neighptr;
|
||||
numneigh[i] = n;
|
||||
ipage.vgot(n);
|
||||
if (ipage.status())
|
||||
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
|
||||
}
|
||||
NEIGH_OMP_CLOSE;
|
||||
list->inum = nlocal;
|
||||
list->gnum = nall - atom->nlocal;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
binned neighbor list construction with full Newton's 3rd law
|
||||
each owned atom i checks its own bin and other bins in Newton stencil
|
||||
every pair stored exactly once by some processor
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void Neighbor::half_bin_newton_omp(NeighList *list)
|
||||
{
|
||||
// bin local & ghost atoms
|
||||
|
||||
if (binatomflag) bin_atoms();
|
||||
|
||||
const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
|
||||
const int molecular = atom->molecular;
|
||||
const int moltemplate = (molecular == 2) ? 1 : 0;
|
||||
|
||||
NEIGH_OMP_INIT;
|
||||
#if defined(_OPENMP)
|
||||
#pragma omp parallel default(none) shared(list)
|
||||
#endif
|
||||
NEIGH_OMP_SETUP(nlocal);
|
||||
|
||||
int i,j,k,n,itype,jtype,ibin,which,imol,iatom;
|
||||
tagint tagprev;
|
||||
double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
|
||||
int *neighptr;
|
||||
|
||||
// loop over each atom, storing neighbors
|
||||
|
||||
double **x = atom->x;
|
||||
int *type = atom->type;
|
||||
int *mask = atom->mask;
|
||||
tagint *tag = atom->tag;
|
||||
tagint *molecule = atom->molecule;
|
||||
tagint **special = atom->special;
|
||||
int **nspecial = atom->nspecial;
|
||||
|
||||
int *molindex = atom->molindex;
|
||||
int *molatom = atom->molatom;
|
||||
Molecule **onemols = atom->avec->onemols;
|
||||
|
||||
int *ilist = list->ilist;
|
||||
int *numneigh = list->numneigh;
|
||||
int **firstneigh = list->firstneigh;
|
||||
int nstencil = list->nstencil;
|
||||
int *stencil = list->stencil;
|
||||
|
||||
// each thread has its own page allocator
|
||||
MyPage<int> &ipage = list->ipage[tid];
|
||||
ipage.reset();
|
||||
|
||||
for (i = ifrom; i < ito; i++) {
|
||||
|
||||
n = 0;
|
||||
neighptr = ipage.vget();
|
||||
|
||||
itype = type[i];
|
||||
xtmp = x[i][0];
|
||||
ytmp = x[i][1];
|
||||
ztmp = x[i][2];
|
||||
if (moltemplate) {
|
||||
imol = molindex[i];
|
||||
iatom = molatom[i];
|
||||
tagprev = tag[i] - iatom - 1;
|
||||
}
|
||||
|
||||
// loop over rest of atoms in i's bin, ghosts are at end of linked list
|
||||
// if j is owned atom, store it, since j is beyond i in linked list
|
||||
// if j is ghost, only store if j coords are "above and to the right" of i
|
||||
|
||||
for (j = bins[i]; j >= 0; j = bins[j]) {
|
||||
if (j >= nlocal) {
|
||||
if (x[j][2] < ztmp) continue;
|
||||
if (x[j][2] == ztmp) {
|
||||
if (x[j][1] < ytmp) continue;
|
||||
if (x[j][1] == ytmp && x[j][0] < xtmp) continue;
|
||||
}
|
||||
}
|
||||
|
||||
jtype = type[j];
|
||||
if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
|
||||
|
||||
delx = xtmp - x[j][0];
|
||||
dely = ytmp - x[j][1];
|
||||
delz = ztmp - x[j][2];
|
||||
rsq = delx*delx + dely*dely + delz*delz;
|
||||
|
||||
if (rsq <= cutneighsq[itype][jtype]) {
|
||||
if (molecular) {
|
||||
if (!moltemplate)
|
||||
which = find_special(special[i],nspecial[i],tag[j]);
|
||||
else if (imol >=0)
|
||||
which = find_special(onemols[imol]->special[iatom],
|
||||
onemols[imol]->nspecial[iatom],
|
||||
tag[j]-tagprev);
|
||||
else which = 0;
|
||||
if (which == 0) neighptr[n++] = j;
|
||||
else if (domain->minimum_image_check(delx,dely,delz))
|
||||
neighptr[n++] = j;
|
||||
else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
|
||||
// OLD: if (which >= 0) neighptr[n++] = j ^ (which << SBBITS);
|
||||
} else neighptr[n++] = j;
|
||||
}
|
||||
}
|
||||
|
||||
// loop over all atoms in other bins in stencil, store every pair
|
||||
|
||||
ibin = coord2bin(x[i]);
|
||||
for (k = 0; k < nstencil; k++) {
|
||||
for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) {
|
||||
jtype = type[j];
|
||||
if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
|
||||
|
||||
delx = xtmp - x[j][0];
|
||||
dely = ytmp - x[j][1];
|
||||
delz = ztmp - x[j][2];
|
||||
rsq = delx*delx + dely*dely + delz*delz;
|
||||
|
||||
if (rsq <= cutneighsq[itype][jtype]) {
|
||||
if (molecular) {
|
||||
if (!moltemplate)
|
||||
which = find_special(special[i],nspecial[i],tag[j]);
|
||||
else if (imol >=0)
|
||||
which = find_special(onemols[imol]->special[iatom],
|
||||
onemols[imol]->nspecial[iatom],
|
||||
tag[j]-tagprev);
|
||||
else which = 0;
|
||||
if (which == 0) neighptr[n++] = j;
|
||||
else if (domain->minimum_image_check(delx,dely,delz))
|
||||
neighptr[n++] = j;
|
||||
else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
|
||||
// OLD: if (which >= 0) neighptr[n++] = j ^ (which << SBBITS);
|
||||
} else neighptr[n++] = j;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ilist[i] = i;
|
||||
firstneigh[i] = neighptr;
|
||||
numneigh[i] = n;
|
||||
ipage.vgot(n);
|
||||
if (ipage.status())
|
||||
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
|
||||
}
|
||||
NEIGH_OMP_CLOSE;
|
||||
list->inum = nlocal;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
binned neighbor list construction with Newton's 3rd law for triclinic
|
||||
each owned atom i checks its own bin and other bins in triclinic stencil
|
||||
every pair stored exactly once by some processor
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void Neighbor::half_bin_newton_tri_omp(NeighList *list)
|
||||
{
|
||||
// bin local & ghost atoms
|
||||
|
||||
if (binatomflag) bin_atoms();
|
||||
|
||||
const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
|
||||
const int molecular = atom->molecular;
|
||||
const int moltemplate = (molecular == 2) ? 1 : 0;
|
||||
|
||||
NEIGH_OMP_INIT;
|
||||
#if defined(_OPENMP)
|
||||
#pragma omp parallel default(none) shared(list)
|
||||
#endif
|
||||
NEIGH_OMP_SETUP(nlocal);
|
||||
|
||||
int i,j,k,n,itype,jtype,ibin,which,imol,iatom;
|
||||
tagint tagprev;
|
||||
double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
|
||||
int *neighptr;
|
||||
|
||||
// loop over each atom, storing neighbors
|
||||
|
||||
double **x = atom->x;
|
||||
int *type = atom->type;
|
||||
int *mask = atom->mask;
|
||||
tagint *tag = atom->tag;
|
||||
tagint *molecule = atom->molecule;
|
||||
tagint **special = atom->special;
|
||||
int **nspecial = atom->nspecial;
|
||||
|
||||
int *molindex = atom->molindex;
|
||||
int *molatom = atom->molatom;
|
||||
Molecule **onemols = atom->avec->onemols;
|
||||
|
||||
int *ilist = list->ilist;
|
||||
int *numneigh = list->numneigh;
|
||||
int **firstneigh = list->firstneigh;
|
||||
int nstencil = list->nstencil;
|
||||
int *stencil = list->stencil;
|
||||
|
||||
// each thread has its own page allocator
|
||||
MyPage<int> &ipage = list->ipage[tid];
|
||||
ipage.reset();
|
||||
|
||||
for (i = ifrom; i < ito; i++) {
|
||||
|
||||
n = 0;
|
||||
neighptr = ipage.vget();
|
||||
|
||||
itype = type[i];
|
||||
xtmp = x[i][0];
|
||||
ytmp = x[i][1];
|
||||
ztmp = x[i][2];
|
||||
if (moltemplate) {
|
||||
imol = molindex[i];
|
||||
iatom = molatom[i];
|
||||
tagprev = tag[i] - iatom - 1;
|
||||
}
|
||||
|
||||
// loop over all atoms in bins in stencil
|
||||
// pairs for atoms j "below" i are excluded
|
||||
// below = lower z or (equal z and lower y) or (equal zy and lower x)
|
||||
// (equal zyx and j <= i)
|
||||
// latter excludes self-self interaction but allows superposed atoms
|
||||
|
||||
ibin = coord2bin(x[i]);
|
||||
for (k = 0; k < nstencil; k++) {
|
||||
for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) {
|
||||
if (x[j][2] < ztmp) continue;
|
||||
if (x[j][2] == ztmp) {
|
||||
if (x[j][1] < ytmp) continue;
|
||||
if (x[j][1] == ytmp) {
|
||||
if (x[j][0] < xtmp) continue;
|
||||
if (x[j][0] == xtmp && j <= i) continue;
|
||||
}
|
||||
}
|
||||
|
||||
jtype = type[j];
|
||||
if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
|
||||
|
||||
delx = xtmp - x[j][0];
|
||||
dely = ytmp - x[j][1];
|
||||
delz = ztmp - x[j][2];
|
||||
rsq = delx*delx + dely*dely + delz*delz;
|
||||
|
||||
if (rsq <= cutneighsq[itype][jtype]) {
|
||||
if (molecular) {
|
||||
if (!moltemplate)
|
||||
which = find_special(special[i],nspecial[i],tag[j]);
|
||||
else if (imol >=0)
|
||||
which = find_special(onemols[imol]->special[iatom],
|
||||
onemols[imol]->nspecial[iatom],
|
||||
tag[j]-tagprev);
|
||||
else which = 0;
|
||||
if (which == 0) neighptr[n++] = j;
|
||||
else if (domain->minimum_image_check(delx,dely,delz))
|
||||
neighptr[n++] = j;
|
||||
else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
|
||||
} else neighptr[n++] = j;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ilist[i] = i;
|
||||
firstneigh[i] = neighptr;
|
||||
numneigh[i] = n;
|
||||
ipage.vgot(n);
|
||||
if (ipage.status())
|
||||
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
|
||||
}
|
||||
NEIGH_OMP_CLOSE;
|
||||
list->inum = nlocal;
|
||||
}
|
||||
@ -1,435 +0,0 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "neighbor.h"
|
||||
#include "neighbor_omp.h"
|
||||
#include "neigh_list.h"
|
||||
#include "atom.h"
|
||||
#include "atom_vec.h"
|
||||
#include "molecule.h"
|
||||
#include "comm.h"
|
||||
#include "domain.h"
|
||||
#include "my_page.h"
|
||||
#include "error.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
binned neighbor list construction with partial Newton's 3rd law
|
||||
each owned atom i checks own bin and other bins in stencil
|
||||
multi-type stencil is itype dependent and is distance checked
|
||||
pair stored once if i,j are both owned and i < j
|
||||
pair stored by me if j is ghost (also stored by proc owning j)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void Neighbor::half_multi_no_newton_omp(NeighList *list)
|
||||
{
|
||||
// bin local & ghost atoms
|
||||
|
||||
if (binatomflag) bin_atoms();
|
||||
|
||||
const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
|
||||
const int molecular = atom->molecular;
|
||||
const int moltemplate = (molecular == 2) ? 1 : 0;
|
||||
|
||||
NEIGH_OMP_INIT;
|
||||
#if defined(_OPENMP)
|
||||
#pragma omp parallel default(none) shared(list)
|
||||
#endif
|
||||
NEIGH_OMP_SETUP(nlocal);
|
||||
|
||||
int i,j,k,n,itype,jtype,ibin,which,ns,imol,iatom;
|
||||
tagint tagprev;
|
||||
double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
|
||||
int *neighptr,*s;
|
||||
double *cutsq,*distsq;
|
||||
|
||||
// loop over each atom, storing neighbors
|
||||
|
||||
double **x = atom->x;
|
||||
int *type = atom->type;
|
||||
int *mask = atom->mask;
|
||||
tagint *tag = atom->tag;
|
||||
tagint *molecule = atom->molecule;
|
||||
tagint **special = atom->special;
|
||||
int **nspecial = atom->nspecial;
|
||||
|
||||
int *molindex = atom->molindex;
|
||||
int *molatom = atom->molatom;
|
||||
Molecule **onemols = atom->avec->onemols;
|
||||
|
||||
int *ilist = list->ilist;
|
||||
int *numneigh = list->numneigh;
|
||||
int **firstneigh = list->firstneigh;
|
||||
int *nstencil_multi = list->nstencil_multi;
|
||||
int **stencil_multi = list->stencil_multi;
|
||||
double **distsq_multi = list->distsq_multi;
|
||||
|
||||
// each thread has its own page allocator
|
||||
MyPage<int> &ipage = list->ipage[tid];
|
||||
ipage.reset();
|
||||
|
||||
for (i = ifrom; i < ito; i++) {
|
||||
|
||||
n = 0;
|
||||
neighptr = ipage.vget();
|
||||
|
||||
itype = type[i];
|
||||
xtmp = x[i][0];
|
||||
ytmp = x[i][1];
|
||||
ztmp = x[i][2];
|
||||
if (moltemplate) {
|
||||
imol = molindex[i];
|
||||
iatom = molatom[i];
|
||||
tagprev = tag[i] - iatom - 1;
|
||||
}
|
||||
|
||||
// loop over all atoms in other bins in stencil including self
|
||||
// only store pair if i < j
|
||||
// skip if i,j neighbor cutoff is less than bin distance
|
||||
// stores own/own pairs only once
|
||||
// stores own/ghost pairs on both procs
|
||||
|
||||
ibin = coord2bin(x[i]);
|
||||
s = stencil_multi[itype];
|
||||
distsq = distsq_multi[itype];
|
||||
cutsq = cutneighsq[itype];
|
||||
ns = nstencil_multi[itype];
|
||||
for (k = 0; k < ns; k++) {
|
||||
for (j = binhead[ibin+s[k]]; j >= 0; j = bins[j]) {
|
||||
if (j <= i) continue;
|
||||
jtype = type[j];
|
||||
if (cutsq[jtype] < distsq[k]) continue;
|
||||
|
||||
if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
|
||||
|
||||
delx = xtmp - x[j][0];
|
||||
dely = ytmp - x[j][1];
|
||||
delz = ztmp - x[j][2];
|
||||
rsq = delx*delx + dely*dely + delz*delz;
|
||||
|
||||
if (rsq <= cutneighsq[itype][jtype]) {
|
||||
if (molecular) {
|
||||
if (!moltemplate)
|
||||
which = find_special(special[i],nspecial[i],tag[j]);
|
||||
else if (imol >=0)
|
||||
which = find_special(onemols[imol]->special[iatom],
|
||||
onemols[imol]->nspecial[iatom],
|
||||
tag[j]-tagprev);
|
||||
else which = 0;
|
||||
if (which == 0) neighptr[n++] = j;
|
||||
else if (domain->minimum_image_check(delx,dely,delz))
|
||||
neighptr[n++] = j;
|
||||
else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
|
||||
} else neighptr[n++] = j;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ilist[i] = i;
|
||||
firstneigh[i] = neighptr;
|
||||
numneigh[i] = n;
|
||||
ipage.vgot(n);
|
||||
if (ipage.status())
|
||||
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
|
||||
}
|
||||
NEIGH_OMP_CLOSE;
|
||||
list->inum = nlocal;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
binned neighbor list construction with full Newton's 3rd law
|
||||
each owned atom i checks its own bin and other bins in Newton stencil
|
||||
multi-type stencil is itype dependent and is distance checked
|
||||
every pair stored exactly once by some processor
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void Neighbor::half_multi_newton_omp(NeighList *list)
|
||||
{
|
||||
// bin local & ghost atoms
|
||||
|
||||
if (binatomflag) bin_atoms();
|
||||
|
||||
const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
|
||||
const int molecular = atom->molecular;
|
||||
const int moltemplate = (molecular == 2) ? 1 : 0;
|
||||
|
||||
NEIGH_OMP_INIT;
|
||||
#if defined(_OPENMP)
|
||||
#pragma omp parallel default(none) shared(list)
|
||||
#endif
|
||||
NEIGH_OMP_SETUP(nlocal);
|
||||
|
||||
int i,j,k,n,itype,jtype,ibin,which,ns,imol,iatom;
|
||||
tagint tagprev;
|
||||
double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
|
||||
int *neighptr,*s;
|
||||
double *cutsq,*distsq;
|
||||
|
||||
// loop over each atom, storing neighbors
|
||||
|
||||
double **x = atom->x;
|
||||
int *type = atom->type;
|
||||
int *mask = atom->mask;
|
||||
tagint *tag = atom->tag;
|
||||
tagint *molecule = atom->molecule;
|
||||
tagint **special = atom->special;
|
||||
int **nspecial = atom->nspecial;
|
||||
|
||||
int *molindex = atom->molindex;
|
||||
int *molatom = atom->molatom;
|
||||
Molecule **onemols = atom->avec->onemols;
|
||||
|
||||
int *ilist = list->ilist;
|
||||
int *numneigh = list->numneigh;
|
||||
int **firstneigh = list->firstneigh;
|
||||
int *nstencil_multi = list->nstencil_multi;
|
||||
int **stencil_multi = list->stencil_multi;
|
||||
double **distsq_multi = list->distsq_multi;
|
||||
|
||||
// each thread has its own page allocator
|
||||
MyPage<int> &ipage = list->ipage[tid];
|
||||
ipage.reset();
|
||||
|
||||
for (i = ifrom; i < ito; i++) {
|
||||
|
||||
n = 0;
|
||||
neighptr = ipage.vget();
|
||||
|
||||
itype = type[i];
|
||||
xtmp = x[i][0];
|
||||
ytmp = x[i][1];
|
||||
ztmp = x[i][2];
|
||||
if (moltemplate) {
|
||||
imol = molindex[i];
|
||||
iatom = molatom[i];
|
||||
tagprev = tag[i] - iatom - 1;
|
||||
}
|
||||
|
||||
// loop over rest of atoms in i's bin, ghosts are at end of linked list
|
||||
// if j is owned atom, store it, since j is beyond i in linked list
|
||||
// if j is ghost, only store if j coords are "above and to the right" of i
|
||||
|
||||
for (j = bins[i]; j >= 0; j = bins[j]) {
|
||||
if (j >= nlocal) {
|
||||
if (x[j][2] < ztmp) continue;
|
||||
if (x[j][2] == ztmp) {
|
||||
if (x[j][1] < ytmp) continue;
|
||||
if (x[j][1] == ytmp && x[j][0] < xtmp) continue;
|
||||
}
|
||||
}
|
||||
|
||||
jtype = type[j];
|
||||
if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
|
||||
|
||||
delx = xtmp - x[j][0];
|
||||
dely = ytmp - x[j][1];
|
||||
delz = ztmp - x[j][2];
|
||||
rsq = delx*delx + dely*dely + delz*delz;
|
||||
|
||||
if (rsq <= cutneighsq[itype][jtype]) {
|
||||
if (molecular) {
|
||||
if (!moltemplate)
|
||||
which = find_special(special[i],nspecial[i],tag[j]);
|
||||
else if (imol >=0)
|
||||
which = find_special(onemols[imol]->special[iatom],
|
||||
onemols[imol]->nspecial[iatom],
|
||||
tag[j]-tagprev);
|
||||
else which = 0;
|
||||
if (which == 0) neighptr[n++] = j;
|
||||
else if (domain->minimum_image_check(delx,dely,delz))
|
||||
neighptr[n++] = j;
|
||||
else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
|
||||
} else neighptr[n++] = j;
|
||||
}
|
||||
}
|
||||
|
||||
// loop over all atoms in other bins in stencil, store every pair
|
||||
// skip if i,j neighbor cutoff is less than bin distance
|
||||
|
||||
ibin = coord2bin(x[i]);
|
||||
s = stencil_multi[itype];
|
||||
distsq = distsq_multi[itype];
|
||||
cutsq = cutneighsq[itype];
|
||||
ns = nstencil_multi[itype];
|
||||
for (k = 0; k < ns; k++) {
|
||||
for (j = binhead[ibin+s[k]]; j >= 0; j = bins[j]) {
|
||||
jtype = type[j];
|
||||
if (cutsq[jtype] < distsq[k]) continue;
|
||||
|
||||
if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
|
||||
|
||||
delx = xtmp - x[j][0];
|
||||
dely = ytmp - x[j][1];
|
||||
delz = ztmp - x[j][2];
|
||||
rsq = delx*delx + dely*dely + delz*delz;
|
||||
|
||||
if (rsq <= cutneighsq[itype][jtype]) {
|
||||
if (molecular) {
|
||||
if (!moltemplate)
|
||||
which = find_special(special[i],nspecial[i],tag[j]);
|
||||
else if (imol >=0)
|
||||
which = find_special(onemols[imol]->special[iatom],
|
||||
onemols[imol]->nspecial[iatom],
|
||||
tag[j]-tagprev);
|
||||
else which = 0;
|
||||
if (which == 0) neighptr[n++] = j;
|
||||
else if (domain->minimum_image_check(delx,dely,delz))
|
||||
neighptr[n++] = j;
|
||||
else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
|
||||
} else neighptr[n++] = j;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ilist[i] = i;
|
||||
firstneigh[i] = neighptr;
|
||||
numneigh[i] = n;
|
||||
ipage.vgot(n);
|
||||
if (ipage.status())
|
||||
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
|
||||
}
|
||||
NEIGH_OMP_CLOSE;
|
||||
list->inum = nlocal;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
binned neighbor list construction with Newton's 3rd law for triclinic
|
||||
each owned atom i checks its own bin and other bins in triclinic stencil
|
||||
multi-type stencil is itype dependent and is distance checked
|
||||
every pair stored exactly once by some processor
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void Neighbor::half_multi_newton_tri_omp(NeighList *list)
|
||||
{
|
||||
// bin local & ghost atoms
|
||||
|
||||
if (binatomflag) bin_atoms();
|
||||
|
||||
const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
|
||||
const int molecular = atom->molecular;
|
||||
const int moltemplate = (molecular == 2) ? 1 : 0;
|
||||
|
||||
NEIGH_OMP_INIT;
|
||||
#if defined(_OPENMP)
|
||||
#pragma omp parallel default(none) shared(list)
|
||||
#endif
|
||||
NEIGH_OMP_SETUP(nlocal);
|
||||
|
||||
int i,j,k,n,itype,jtype,ibin,which,ns,imol,iatom;
|
||||
tagint tagprev;
|
||||
double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
|
||||
int *neighptr,*s;
|
||||
double *cutsq,*distsq;
|
||||
|
||||
// loop over each atom, storing neighbors
|
||||
|
||||
double **x = atom->x;
|
||||
int *type = atom->type;
|
||||
int *mask = atom->mask;
|
||||
tagint *tag = atom->tag;
|
||||
tagint *molecule = atom->molecule;
|
||||
tagint **special = atom->special;
|
||||
int **nspecial = atom->nspecial;
|
||||
|
||||
int *molindex = atom->molindex;
|
||||
int *molatom = atom->molatom;
|
||||
Molecule **onemols = atom->avec->onemols;
|
||||
|
||||
int *ilist = list->ilist;
|
||||
int *numneigh = list->numneigh;
|
||||
int **firstneigh = list->firstneigh;
|
||||
int *nstencil_multi = list->nstencil_multi;
|
||||
int **stencil_multi = list->stencil_multi;
|
||||
double **distsq_multi = list->distsq_multi;
|
||||
|
||||
// each thread has its own page allocator
|
||||
MyPage<int> &ipage = list->ipage[tid];
|
||||
ipage.reset();
|
||||
|
||||
for (i = ifrom; i < ito; i++) {
|
||||
|
||||
n = 0;
|
||||
neighptr = ipage.vget();
|
||||
|
||||
itype = type[i];
|
||||
xtmp = x[i][0];
|
||||
ytmp = x[i][1];
|
||||
ztmp = x[i][2];
|
||||
if (moltemplate) {
|
||||
imol = molindex[i];
|
||||
iatom = molatom[i];
|
||||
tagprev = tag[i] - iatom - 1;
|
||||
}
|
||||
|
||||
// loop over all atoms in bins, including self, in stencil
|
||||
// skip if i,j neighbor cutoff is less than bin distance
|
||||
// bins below self are excluded from stencil
|
||||
// pairs for atoms j "below" i are excluded
|
||||
// below = lower z or (equal z and lower y) or (equal zy and lower x)
|
||||
// (equal zyx and j <= i)
|
||||
// latter excludes self-self interaction but allows superposed atoms
|
||||
|
||||
ibin = coord2bin(x[i]);
|
||||
s = stencil_multi[itype];
|
||||
distsq = distsq_multi[itype];
|
||||
cutsq = cutneighsq[itype];
|
||||
ns = nstencil_multi[itype];
|
||||
for (k = 0; k < ns; k++) {
|
||||
for (j = binhead[ibin+s[k]]; j >= 0; j = bins[j]) {
|
||||
jtype = type[j];
|
||||
if (cutsq[jtype] < distsq[k]) continue;
|
||||
if (x[j][2] < ztmp) continue;
|
||||
if (x[j][2] == ztmp) {
|
||||
if (x[j][1] < ytmp) continue;
|
||||
if (x[j][1] == ytmp) {
|
||||
if (x[j][0] < xtmp) continue;
|
||||
if (x[j][0] == xtmp && j <= i) continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
|
||||
|
||||
delx = xtmp - x[j][0];
|
||||
dely = ytmp - x[j][1];
|
||||
delz = ztmp - x[j][2];
|
||||
rsq = delx*delx + dely*dely + delz*delz;
|
||||
|
||||
if (rsq <= cutneighsq[itype][jtype]) {
|
||||
if (molecular) {
|
||||
if (!moltemplate)
|
||||
which = find_special(special[i],nspecial[i],tag[j]);
|
||||
else if (imol >=0)
|
||||
which = find_special(onemols[imol]->special[iatom],
|
||||
onemols[imol]->nspecial[iatom],
|
||||
tag[j]-tagprev);
|
||||
else which = 0;
|
||||
if (which == 0) neighptr[n++] = j;
|
||||
else if (domain->minimum_image_check(delx,dely,delz))
|
||||
neighptr[n++] = j;
|
||||
else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
|
||||
} else neighptr[n++] = j;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ilist[i] = i;
|
||||
firstneigh[i] = neighptr;
|
||||
numneigh[i] = n;
|
||||
ipage.vgot(n);
|
||||
if (ipage.status())
|
||||
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
|
||||
}
|
||||
NEIGH_OMP_CLOSE;
|
||||
list->inum = nlocal;
|
||||
}
|
||||
@ -1,376 +0,0 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "neighbor.h"
|
||||
#include "neighbor_omp.h"
|
||||
#include "neigh_list.h"
|
||||
#include "atom.h"
|
||||
#include "atom_vec.h"
|
||||
#include "molecule.h"
|
||||
#include "comm.h"
|
||||
#include "domain.h"
|
||||
#include "group.h"
|
||||
#include "my_page.h"
|
||||
#include "error.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
N^2 / 2 search for neighbor pairs with partial Newton's 3rd law
|
||||
pair stored once if i,j are both owned and i < j
|
||||
pair stored by me if j is ghost (also stored by proc owning j)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void Neighbor::half_nsq_no_newton_omp(NeighList *list)
|
||||
{
|
||||
const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
|
||||
const int bitmask = (includegroup) ? group->bitmask[includegroup] : 0;
|
||||
const int nall = atom->nlocal + atom->nghost;
|
||||
const int molecular = atom->molecular;
|
||||
const int moltemplate = (molecular == 2) ? 1 : 0;
|
||||
|
||||
NEIGH_OMP_INIT;
|
||||
#if defined(_OPENMP)
|
||||
#pragma omp parallel default(none) shared(list)
|
||||
#endif
|
||||
NEIGH_OMP_SETUP(nlocal);
|
||||
|
||||
int i,j,n,itype,jtype,which,imol,iatom;
|
||||
tagint tagprev;
|
||||
double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
|
||||
int *neighptr;
|
||||
|
||||
double **x = atom->x;
|
||||
int *type = atom->type;
|
||||
int *mask = atom->mask;
|
||||
tagint *tag = atom->tag;
|
||||
tagint *molecule = atom->molecule;
|
||||
tagint **special = atom->special;
|
||||
int **nspecial = atom->nspecial;
|
||||
|
||||
int *molindex = atom->molindex;
|
||||
int *molatom = atom->molatom;
|
||||
Molecule **onemols = atom->avec->onemols;
|
||||
|
||||
int *ilist = list->ilist;
|
||||
int *numneigh = list->numneigh;
|
||||
int **firstneigh = list->firstneigh;
|
||||
|
||||
// each thread has its own page allocator
|
||||
MyPage<int> &ipage = list->ipage[tid];
|
||||
ipage.reset();
|
||||
|
||||
// loop over owned atoms, storing neighbors
|
||||
|
||||
for (i = ifrom; i < ito; i++) {
|
||||
|
||||
n = 0;
|
||||
neighptr = ipage.vget();
|
||||
|
||||
itype = type[i];
|
||||
xtmp = x[i][0];
|
||||
ytmp = x[i][1];
|
||||
ztmp = x[i][2];
|
||||
if (moltemplate) {
|
||||
imol = molindex[i];
|
||||
iatom = molatom[i];
|
||||
tagprev = tag[i] - iatom - 1;
|
||||
}
|
||||
|
||||
// loop over remaining atoms, owned and ghost
|
||||
// only store pair if i < j
|
||||
|
||||
for (j = i+1; j < nall; j++) {
|
||||
if (includegroup && !(mask[j] & bitmask)) continue;
|
||||
jtype = type[j];
|
||||
if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
|
||||
|
||||
delx = xtmp - x[j][0];
|
||||
dely = ytmp - x[j][1];
|
||||
delz = ztmp - x[j][2];
|
||||
rsq = delx*delx + dely*dely + delz*delz;
|
||||
|
||||
if (rsq <= cutneighsq[itype][jtype]) {
|
||||
if (molecular) {
|
||||
if (!moltemplate)
|
||||
which = find_special(special[i],nspecial[i],tag[j]);
|
||||
else if (imol >=0)
|
||||
which = find_special(onemols[imol]->special[iatom],
|
||||
onemols[imol]->nspecial[iatom],
|
||||
tag[j]-tagprev);
|
||||
else which = 0;
|
||||
if (which == 0) neighptr[n++] = j;
|
||||
else if (domain->minimum_image_check(delx,dely,delz))
|
||||
neighptr[n++] = j;
|
||||
else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
|
||||
} else neighptr[n++] = j;
|
||||
}
|
||||
}
|
||||
|
||||
ilist[i] = i;
|
||||
firstneigh[i] = neighptr;
|
||||
numneigh[i] = n;
|
||||
ipage.vgot(n);
|
||||
if (ipage.status())
|
||||
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
|
||||
}
|
||||
NEIGH_OMP_CLOSE;
|
||||
list->inum = nlocal;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
N^2 / 2 search for neighbor pairs with partial Newton's 3rd law
|
||||
include neighbors of ghost atoms, but no "special neighbors" for ghosts
|
||||
pair stored once if i,j are both owned and i < j
|
||||
pair stored by me if i owned and j ghost (also stored by proc owning j)
|
||||
pair stored once if i,j are both ghost and i < j
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void Neighbor::half_nsq_no_newton_ghost_omp(NeighList *list)
|
||||
{
|
||||
const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
|
||||
const int bitmask = (includegroup) ? group->bitmask[includegroup] : 0;
|
||||
const int nall = nlocal + atom->nghost;
|
||||
const int molecular = atom->molecular;
|
||||
const int moltemplate = (molecular == 2) ? 1 : 0;
|
||||
|
||||
NEIGH_OMP_INIT;
|
||||
#if defined(_OPENMP)
|
||||
#pragma omp parallel default(none) shared(list)
|
||||
#endif
|
||||
NEIGH_OMP_SETUP(nall);
|
||||
|
||||
int i,j,n,itype,jtype,which,imol,iatom;
|
||||
tagint tagprev;
|
||||
double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
|
||||
int *neighptr;
|
||||
|
||||
double **x = atom->x;
|
||||
int *type = atom->type;
|
||||
int *mask = atom->mask;
|
||||
tagint *tag = atom->tag;
|
||||
tagint *molecule = atom->molecule;
|
||||
tagint **special = atom->special;
|
||||
int **nspecial = atom->nspecial;
|
||||
|
||||
int *molindex = atom->molindex;
|
||||
int *molatom = atom->molatom;
|
||||
Molecule **onemols = atom->avec->onemols;
|
||||
|
||||
int *ilist = list->ilist;
|
||||
int *numneigh = list->numneigh;
|
||||
int **firstneigh = list->firstneigh;
|
||||
|
||||
// each thread has its own page allocator
|
||||
MyPage<int> &ipage = list->ipage[tid];
|
||||
ipage.reset();
|
||||
|
||||
// loop over owned & ghost atoms, storing neighbors
|
||||
|
||||
for (i = ifrom; i < ito; i++) {
|
||||
|
||||
n = 0;
|
||||
neighptr = ipage.vget();
|
||||
|
||||
itype = type[i];
|
||||
xtmp = x[i][0];
|
||||
ytmp = x[i][1];
|
||||
ztmp = x[i][2];
|
||||
if (moltemplate) {
|
||||
imol = molindex[i];
|
||||
iatom = molatom[i];
|
||||
tagprev = tag[i] - iatom - 1;
|
||||
}
|
||||
|
||||
// loop over remaining atoms, owned and ghost
|
||||
// only store pair if i < j
|
||||
// stores own/own pairs only once
|
||||
// stores own/ghost pairs with owned atom only, on both procs
|
||||
// stores ghost/ghost pairs only once
|
||||
// no molecular test when i = ghost atom
|
||||
|
||||
if (i < nlocal) {
|
||||
for (j = i+1; j < nall; j++) {
|
||||
if (includegroup && !(mask[j] & bitmask)) continue;
|
||||
jtype = type[j];
|
||||
if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
|
||||
|
||||
delx = xtmp - x[j][0];
|
||||
dely = ytmp - x[j][1];
|
||||
delz = ztmp - x[j][2];
|
||||
rsq = delx*delx + dely*dely + delz*delz;
|
||||
|
||||
if (rsq <= cutneighsq[itype][jtype]) {
|
||||
if (molecular) {
|
||||
if (!moltemplate)
|
||||
which = find_special(special[i],nspecial[i],tag[j]);
|
||||
else if (imol >=0)
|
||||
which = find_special(onemols[imol]->special[iatom],
|
||||
onemols[imol]->nspecial[iatom],
|
||||
tag[j]-tagprev);
|
||||
else which = 0;
|
||||
if (which == 0) neighptr[n++] = j;
|
||||
else if (domain->minimum_image_check(delx,dely,delz))
|
||||
neighptr[n++] = j;
|
||||
else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
|
||||
} else neighptr[n++] = j;
|
||||
}
|
||||
}
|
||||
|
||||
} else {
|
||||
for (j = i+1; j < nall; j++) {
|
||||
if (includegroup && !(mask[j] & bitmask)) continue;
|
||||
jtype = type[j];
|
||||
if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
|
||||
|
||||
delx = xtmp - x[j][0];
|
||||
dely = ytmp - x[j][1];
|
||||
delz = ztmp - x[j][2];
|
||||
rsq = delx*delx + dely*dely + delz*delz;
|
||||
|
||||
if (rsq <= cutneighsq[itype][jtype]) neighptr[n++] = j;
|
||||
}
|
||||
}
|
||||
|
||||
ilist[i] = i;
|
||||
firstneigh[i] = neighptr;
|
||||
numneigh[i] = n;
|
||||
ipage.vgot(n);
|
||||
if (ipage.status())
|
||||
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
|
||||
}
|
||||
NEIGH_OMP_CLOSE;
|
||||
list->inum = atom->nlocal;
|
||||
list->gnum = nall - atom->nlocal;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
N^2 / 2 search for neighbor pairs with full Newton's 3rd law
|
||||
every pair stored exactly once by some processor
|
||||
decision on ghost atoms based on itag,jtag tests
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void Neighbor::half_nsq_newton_omp(NeighList *list)
|
||||
{
|
||||
const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
|
||||
const int bitmask = (includegroup) ? group->bitmask[includegroup] : 0;
|
||||
const int molecular = atom->molecular;
|
||||
const int moltemplate = (molecular == 2) ? 1 : 0;
|
||||
|
||||
NEIGH_OMP_INIT;
|
||||
#if defined(_OPENMP)
|
||||
#pragma omp parallel default(none) shared(list)
|
||||
#endif
|
||||
NEIGH_OMP_SETUP(nlocal);
|
||||
|
||||
int i,j,n,itype,jtype,itag,jtag,which,imol,iatom;
|
||||
tagint tagprev;
|
||||
double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
|
||||
int *neighptr;
|
||||
|
||||
// loop over each atom, storing neighbors
|
||||
|
||||
double **x = atom->x;
|
||||
int *type = atom->type;
|
||||
int *mask = atom->mask;
|
||||
tagint *tag = atom->tag;
|
||||
tagint *molecule = atom->molecule;
|
||||
tagint **special = atom->special;
|
||||
int **nspecial = atom->nspecial;
|
||||
|
||||
int *molindex = atom->molindex;
|
||||
int *molatom = atom->molatom;
|
||||
Molecule **onemols = atom->avec->onemols;
|
||||
|
||||
int nall = atom->nlocal + atom->nghost;
|
||||
|
||||
int *ilist = list->ilist;
|
||||
int *numneigh = list->numneigh;
|
||||
int **firstneigh = list->firstneigh;
|
||||
|
||||
// each thread has its own page allocator
|
||||
MyPage<int> &ipage = list->ipage[tid];
|
||||
ipage.reset();
|
||||
|
||||
for (i = ifrom; i < ito; i++) {
|
||||
|
||||
n = 0;
|
||||
neighptr = ipage.vget();
|
||||
|
||||
itag = tag[i];
|
||||
itype = type[i];
|
||||
xtmp = x[i][0];
|
||||
ytmp = x[i][1];
|
||||
ztmp = x[i][2];
|
||||
if (moltemplate) {
|
||||
imol = molindex[i];
|
||||
iatom = molatom[i];
|
||||
tagprev = tag[i] - iatom - 1;
|
||||
}
|
||||
|
||||
// loop over remaining atoms, owned and ghost
|
||||
// itag = jtag is possible for long cutoffs that include images of self
|
||||
|
||||
for (j = i+1; j < nall; j++) {
|
||||
if (includegroup && !(mask[j] & bitmask)) continue;
|
||||
|
||||
if (j >= nlocal) {
|
||||
jtag = tag[j];
|
||||
if (itag > jtag) {
|
||||
if ((itag+jtag) % 2 == 0) continue;
|
||||
} else if (itag < jtag) {
|
||||
if ((itag+jtag) % 2 == 1) continue;
|
||||
} else {
|
||||
if (x[j][2] < ztmp) continue;
|
||||
if (x[j][2] == ztmp) {
|
||||
if (x[j][1] < ytmp) continue;
|
||||
if (x[j][1] == ytmp && x[j][0] < xtmp) continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
jtype = type[j];
|
||||
if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
|
||||
|
||||
delx = xtmp - x[j][0];
|
||||
dely = ytmp - x[j][1];
|
||||
delz = ztmp - x[j][2];
|
||||
rsq = delx*delx + dely*dely + delz*delz;
|
||||
|
||||
if (rsq <= cutneighsq[itype][jtype]) {
|
||||
if (molecular) {
|
||||
if (!moltemplate)
|
||||
which = find_special(special[i],nspecial[i],tag[j]);
|
||||
else if (imol >=0)
|
||||
which = find_special(onemols[imol]->special[iatom],
|
||||
onemols[imol]->nspecial[iatom],
|
||||
tag[j]-tagprev);
|
||||
else which = 0;
|
||||
if (which == 0) neighptr[n++] = j;
|
||||
else if (domain->minimum_image_check(delx,dely,delz))
|
||||
neighptr[n++] = j;
|
||||
else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
|
||||
} else neighptr[n++] = j;
|
||||
}
|
||||
}
|
||||
|
||||
ilist[i] = i;
|
||||
firstneigh[i] = neighptr;
|
||||
numneigh[i] = n;
|
||||
ipage.vgot(n);
|
||||
if (ipage.status())
|
||||
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
|
||||
}
|
||||
NEIGH_OMP_CLOSE;
|
||||
list->inum = nlocal;
|
||||
}
|
||||
@ -1,972 +0,0 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "neighbor.h"
|
||||
#include "neighbor_omp.h"
|
||||
#include "neigh_list.h"
|
||||
#include "atom.h"
|
||||
#include "atom_vec.h"
|
||||
#include "molecule.h"
|
||||
#include "comm.h"
|
||||
#include "domain.h"
|
||||
#include "group.h"
|
||||
#include "my_page.h"
|
||||
#include "error.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
multiple respa lists
|
||||
N^2 / 2 search for neighbor pairs with partial Newton's 3rd law
|
||||
pair added to list if atoms i and j are both owned and i < j
|
||||
pair added if j is ghost (also stored by proc owning j)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void Neighbor::respa_nsq_no_newton_omp(NeighList *list)
|
||||
{
|
||||
const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
|
||||
const int bitmask = (includegroup) ? group->bitmask[includegroup] : 0;
|
||||
const int molecular = atom->molecular;
|
||||
const int moltemplate = (molecular == 2) ? 1 : 0;
|
||||
|
||||
NEIGH_OMP_INIT;
|
||||
|
||||
NeighList *listinner = list->listinner;
|
||||
NeighList *listmiddle = list->listmiddle;
|
||||
const int respamiddle = list->respamiddle;
|
||||
|
||||
#if defined(_OPENMP)
|
||||
#pragma omp parallel default(none) shared(list,listinner,listmiddle)
|
||||
#endif
|
||||
NEIGH_OMP_SETUP(nlocal);
|
||||
|
||||
int i,j,n,itype,jtype,n_inner,n_middle,imol,iatom;
|
||||
tagint tagprev;
|
||||
double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
|
||||
int *neighptr,*neighptr_inner,*neighptr_middle;
|
||||
|
||||
// loop over each atom, storing neighbors
|
||||
|
||||
double **x = atom->x;
|
||||
int *type = atom->type;
|
||||
int *mask = atom->mask;
|
||||
tagint *tag = atom->tag;
|
||||
tagint *molecule = atom->molecule;
|
||||
tagint **special = atom->special;
|
||||
int **nspecial = atom->nspecial;
|
||||
|
||||
int *molindex = atom->molindex;
|
||||
int *molatom = atom->molatom;
|
||||
Molecule **onemols = atom->avec->onemols;
|
||||
|
||||
int nall = atom->nlocal + atom->nghost;
|
||||
|
||||
int *ilist = list->ilist;
|
||||
int *numneigh = list->numneigh;
|
||||
int **firstneigh = list->firstneigh;
|
||||
|
||||
int *ilist_inner = listinner->ilist;
|
||||
int *numneigh_inner = listinner->numneigh;
|
||||
int **firstneigh_inner = listinner->firstneigh;
|
||||
|
||||
int *ilist_middle,*numneigh_middle,**firstneigh_middle;
|
||||
if (respamiddle) {
|
||||
ilist_middle = listmiddle->ilist;
|
||||
numneigh_middle = listmiddle->numneigh;
|
||||
firstneigh_middle = listmiddle->firstneigh;
|
||||
}
|
||||
|
||||
// each thread has its own page allocator
|
||||
MyPage<int> &ipage = list->ipage[tid];
|
||||
MyPage<int> &ipage_inner = listinner->ipage[tid];
|
||||
ipage.reset();
|
||||
ipage_inner.reset();
|
||||
|
||||
MyPage<int> *ipage_middle;
|
||||
if (respamiddle) {
|
||||
ipage_middle = listmiddle->ipage + tid;
|
||||
ipage_middle->reset();
|
||||
}
|
||||
|
||||
int which = 0;
|
||||
int minchange = 0;
|
||||
|
||||
for (i = ifrom; i < ito; i++) {
|
||||
|
||||
n = n_inner = 0;
|
||||
neighptr = ipage.vget();
|
||||
neighptr_inner = ipage_inner.vget();
|
||||
if (respamiddle) {
|
||||
n_middle = 0;
|
||||
neighptr_middle = ipage_middle->vget();
|
||||
}
|
||||
|
||||
itype = type[i];
|
||||
xtmp = x[i][0];
|
||||
ytmp = x[i][1];
|
||||
ztmp = x[i][2];
|
||||
if (moltemplate) {
|
||||
imol = molindex[i];
|
||||
iatom = molatom[i];
|
||||
tagprev = tag[i] - iatom - 1;
|
||||
}
|
||||
|
||||
// loop over remaining atoms, owned and ghost
|
||||
|
||||
for (j = i+1; j < nall; j++) {
|
||||
if (includegroup && !(mask[j] & bitmask)) continue;
|
||||
jtype = type[j];
|
||||
if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
|
||||
|
||||
delx = xtmp - x[j][0];
|
||||
dely = ytmp - x[j][1];
|
||||
delz = ztmp - x[j][2];
|
||||
rsq = delx*delx + dely*dely + delz*delz;
|
||||
|
||||
if (rsq <= cutneighsq[itype][jtype]) {
|
||||
if (molecular) {
|
||||
if (!moltemplate)
|
||||
which = find_special(special[i],nspecial[i],tag[j]);
|
||||
else if (imol >=0)
|
||||
which = find_special(onemols[imol]->special[iatom],
|
||||
onemols[imol]->nspecial[iatom],
|
||||
tag[j]-tagprev);
|
||||
else which = 0;
|
||||
if (which == 0) neighptr[n++] = j;
|
||||
else if ((minchange = domain->minimum_image_check(delx,dely,delz)))
|
||||
neighptr[n++] = j;
|
||||
else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
|
||||
} else neighptr[n++] = j;
|
||||
|
||||
if (rsq < cut_inner_sq) {
|
||||
if (which == 0) neighptr_inner[n_inner++] = j;
|
||||
else if (minchange) neighptr_inner[n_inner++] = j;
|
||||
else if (which > 0) neighptr_inner[n_inner++] = j ^ (which << SBBITS);
|
||||
}
|
||||
|
||||
if (respamiddle && rsq < cut_middle_sq && rsq > cut_middle_inside_sq) {
|
||||
if (which == 0) neighptr_middle[n_middle++] = j;
|
||||
else if (minchange) neighptr_middle[n_middle++] = j;
|
||||
else if (which > 0)
|
||||
neighptr_middle[n_middle++] = j ^ (which << SBBITS);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ilist[i] = i;
|
||||
firstneigh[i] = neighptr;
|
||||
numneigh[i] = n;
|
||||
ipage.vgot(n);
|
||||
if (ipage.status())
|
||||
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
|
||||
|
||||
ilist_inner[i] = i;
|
||||
firstneigh_inner[i] = neighptr_inner;
|
||||
numneigh_inner[i] = n_inner;
|
||||
ipage.vgot(n_inner);
|
||||
if (ipage_inner.status())
|
||||
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
|
||||
|
||||
if (respamiddle) {
|
||||
ilist_middle[i] = i;
|
||||
firstneigh_middle[i] = neighptr_middle;
|
||||
numneigh_middle[i] = n_middle;
|
||||
ipage_middle->vgot(n_middle);
|
||||
if (ipage_middle->status())
|
||||
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
|
||||
}
|
||||
}
|
||||
NEIGH_OMP_CLOSE;
|
||||
list->inum = nlocal;
|
||||
listinner->inum = nlocal;
|
||||
if (respamiddle) listmiddle->inum = nlocal;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
multiple respa lists
|
||||
N^2 / 2 search for neighbor pairs with full Newton's 3rd law
|
||||
pair added to list if atoms i and j are both owned and i < j
|
||||
if j is ghost only me or other proc adds pair
|
||||
decision based on itag,jtag tests
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void Neighbor::respa_nsq_newton_omp(NeighList *list)
|
||||
{
|
||||
const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
|
||||
const int bitmask = (includegroup) ? group->bitmask[includegroup] : 0;
|
||||
const int molecular = atom->molecular;
|
||||
const int moltemplate = (molecular == 2) ? 1 : 0;
|
||||
|
||||
NEIGH_OMP_INIT;
|
||||
|
||||
NeighList *listinner = list->listinner;
|
||||
NeighList *listmiddle = list->listmiddle;
|
||||
const int respamiddle = list->respamiddle;
|
||||
|
||||
#if defined(_OPENMP)
|
||||
#pragma omp parallel default(none) shared(list,listinner,listmiddle)
|
||||
#endif
|
||||
NEIGH_OMP_SETUP(nlocal);
|
||||
|
||||
int i,j,n,itype,jtype,itag,jtag,n_inner,n_middle,imol,iatom;
|
||||
tagint tagprev;
|
||||
double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
|
||||
int *neighptr,*neighptr_inner,*neighptr_middle;
|
||||
|
||||
// loop over each atom, storing neighbors
|
||||
|
||||
double **x = atom->x;
|
||||
int *type = atom->type;
|
||||
int *mask = atom->mask;
|
||||
tagint *tag = atom->tag;
|
||||
tagint *molecule = atom->molecule;
|
||||
tagint **special = atom->special;
|
||||
int **nspecial = atom->nspecial;
|
||||
|
||||
int *molindex = atom->molindex;
|
||||
int *molatom = atom->molatom;
|
||||
Molecule **onemols = atom->avec->onemols;
|
||||
|
||||
int nall = atom->nlocal + atom->nghost;
|
||||
|
||||
int *ilist = list->ilist;
|
||||
int *numneigh = list->numneigh;
|
||||
int **firstneigh = list->firstneigh;
|
||||
|
||||
int *ilist_inner = listinner->ilist;
|
||||
int *numneigh_inner = listinner->numneigh;
|
||||
int **firstneigh_inner = listinner->firstneigh;
|
||||
|
||||
int *ilist_middle,*numneigh_middle,**firstneigh_middle;
|
||||
if (respamiddle) {
|
||||
ilist_middle = listmiddle->ilist;
|
||||
numneigh_middle = listmiddle->numneigh;
|
||||
firstneigh_middle = listmiddle->firstneigh;
|
||||
}
|
||||
|
||||
// each thread has its own page allocator
|
||||
MyPage<int> &ipage = list->ipage[tid];
|
||||
MyPage<int> &ipage_inner = listinner->ipage[tid];
|
||||
ipage.reset();
|
||||
ipage_inner.reset();
|
||||
|
||||
MyPage<int> *ipage_middle;
|
||||
if (respamiddle) {
|
||||
ipage_middle = listmiddle->ipage + tid;
|
||||
ipage_middle->reset();
|
||||
}
|
||||
|
||||
int which = 0;
|
||||
int minchange = 0;
|
||||
|
||||
for (i = ifrom; i < ito; i++) {
|
||||
|
||||
n = n_inner = 0;
|
||||
neighptr = ipage.vget();
|
||||
neighptr_inner = ipage_inner.vget();
|
||||
if (respamiddle) {
|
||||
n_middle = 0;
|
||||
neighptr_middle = ipage_middle->vget();
|
||||
}
|
||||
|
||||
itag = tag[i];
|
||||
itype = type[i];
|
||||
xtmp = x[i][0];
|
||||
ytmp = x[i][1];
|
||||
ztmp = x[i][2];
|
||||
if (moltemplate) {
|
||||
imol = molindex[i];
|
||||
iatom = molatom[i];
|
||||
tagprev = tag[i] - iatom - 1;
|
||||
}
|
||||
|
||||
// loop over remaining atoms, owned and ghost
|
||||
|
||||
for (j = i+1; j < nall; j++) {
|
||||
if (includegroup && !(mask[j] & bitmask)) continue;
|
||||
|
||||
if (j >= nlocal) {
|
||||
jtag = tag[j];
|
||||
if (itag > jtag) {
|
||||
if ((itag+jtag) % 2 == 0) continue;
|
||||
} else if (itag < jtag) {
|
||||
if ((itag+jtag) % 2 == 1) continue;
|
||||
} else {
|
||||
if (x[j][2] < ztmp) continue;
|
||||
if (x[j][2] == ztmp) {
|
||||
if (x[j][1] < ytmp) continue;
|
||||
if (x[j][1] == ytmp && x[j][0] < xtmp) continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
jtype = type[j];
|
||||
if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
|
||||
|
||||
delx = xtmp - x[j][0];
|
||||
dely = ytmp - x[j][1];
|
||||
delz = ztmp - x[j][2];
|
||||
rsq = delx*delx + dely*dely + delz*delz;
|
||||
|
||||
if (rsq <= cutneighsq[itype][jtype]) {
|
||||
if (molecular) {
|
||||
if (!moltemplate)
|
||||
which = find_special(special[i],nspecial[i],tag[j]);
|
||||
else if (imol >=0)
|
||||
which = find_special(onemols[imol]->special[iatom],
|
||||
onemols[imol]->nspecial[iatom],
|
||||
tag[j]-tagprev);
|
||||
else which = 0;
|
||||
if (which == 0) neighptr[n++] = j;
|
||||
else if ((minchange = domain->minimum_image_check(delx,dely,delz)))
|
||||
neighptr[n++] = j;
|
||||
else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
|
||||
} else neighptr[n++] = j;
|
||||
|
||||
if (rsq < cut_inner_sq) {
|
||||
if (which == 0) neighptr_inner[n_inner++] = j;
|
||||
else if (minchange) neighptr_inner[n_inner++] = j;
|
||||
else if (which > 0) neighptr_inner[n_inner++] = j ^ (which << SBBITS);
|
||||
}
|
||||
|
||||
if (respamiddle &&
|
||||
rsq < cut_middle_sq && rsq > cut_middle_inside_sq) {
|
||||
if (which == 0) neighptr_middle[n_middle++] = j;
|
||||
else if (minchange) neighptr_middle[n_middle++] = j;
|
||||
else if (which > 0)
|
||||
neighptr_middle[n_middle++] = j ^ (which << SBBITS);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ilist[i] = i;
|
||||
firstneigh[i] = neighptr;
|
||||
numneigh[i] = n;
|
||||
ipage.vgot(n);
|
||||
if (ipage.status())
|
||||
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
|
||||
|
||||
ilist_inner[i] = i;
|
||||
firstneigh_inner[i] = neighptr_inner;
|
||||
numneigh_inner[i] = n_inner;
|
||||
ipage.vgot(n_inner);
|
||||
if (ipage_inner.status())
|
||||
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
|
||||
|
||||
if (respamiddle) {
|
||||
ilist_middle[i] = i;
|
||||
firstneigh_middle[i] = neighptr_middle;
|
||||
numneigh_middle[i] = n_middle;
|
||||
ipage_middle->vgot(n_middle);
|
||||
if (ipage_middle->status())
|
||||
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
|
||||
}
|
||||
}
|
||||
NEIGH_OMP_CLOSE;
|
||||
list->inum = nlocal;
|
||||
listinner->inum = nlocal;
|
||||
if (respamiddle) listmiddle->inum = nlocal;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
multiple respa lists
|
||||
binned neighbor list construction with partial Newton's 3rd law
|
||||
each owned atom i checks own bin and surrounding bins in non-Newton stencil
|
||||
pair stored once if i,j are both owned and i < j
|
||||
pair stored by me if j is ghost (also stored by proc owning j)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void Neighbor::respa_bin_no_newton_omp(NeighList *list)
|
||||
{
|
||||
// bin local & ghost atoms
|
||||
|
||||
if (binatomflag) bin_atoms();
|
||||
|
||||
const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
|
||||
const int molecular = atom->molecular;
|
||||
const int moltemplate = (molecular == 2) ? 1 : 0;
|
||||
|
||||
NEIGH_OMP_INIT;
|
||||
|
||||
NeighList *listinner = list->listinner;
|
||||
NeighList *listmiddle = list->listmiddle;
|
||||
const int respamiddle = list->respamiddle;
|
||||
|
||||
#if defined(_OPENMP)
|
||||
#pragma omp parallel default(none) shared(list,listinner,listmiddle)
|
||||
#endif
|
||||
NEIGH_OMP_SETUP(nlocal);
|
||||
|
||||
int i,j,k,n,itype,jtype,ibin,n_inner,n_middle,imol,iatom;
|
||||
tagint tagprev;
|
||||
double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
|
||||
int *neighptr,*neighptr_inner,*neighptr_middle;
|
||||
|
||||
// loop over each atom, storing neighbors
|
||||
|
||||
double **x = atom->x;
|
||||
int *type = atom->type;
|
||||
int *mask = atom->mask;
|
||||
tagint *tag = atom->tag;
|
||||
tagint *molecule = atom->molecule;
|
||||
tagint **special = atom->special;
|
||||
int **nspecial = atom->nspecial;
|
||||
|
||||
int *molindex = atom->molindex;
|
||||
int *molatom = atom->molatom;
|
||||
Molecule **onemols = atom->avec->onemols;
|
||||
|
||||
int *ilist = list->ilist;
|
||||
int *numneigh = list->numneigh;
|
||||
int **firstneigh = list->firstneigh;
|
||||
int nstencil = list->nstencil;
|
||||
int *stencil = list->stencil;
|
||||
|
||||
int *ilist_inner = listinner->ilist;
|
||||
int *numneigh_inner = listinner->numneigh;
|
||||
int **firstneigh_inner = listinner->firstneigh;
|
||||
|
||||
int *ilist_middle,*numneigh_middle,**firstneigh_middle;
|
||||
if (respamiddle) {
|
||||
ilist_middle = listmiddle->ilist;
|
||||
numneigh_middle = listmiddle->numneigh;
|
||||
firstneigh_middle = listmiddle->firstneigh;
|
||||
}
|
||||
|
||||
// each thread has its own page allocator
|
||||
MyPage<int> &ipage = list->ipage[tid];
|
||||
MyPage<int> &ipage_inner = listinner->ipage[tid];
|
||||
ipage.reset();
|
||||
ipage_inner.reset();
|
||||
|
||||
MyPage<int> *ipage_middle;
|
||||
if (respamiddle) {
|
||||
ipage_middle = listmiddle->ipage + tid;
|
||||
ipage_middle->reset();
|
||||
}
|
||||
|
||||
int which = 0;
|
||||
int minchange = 0;
|
||||
|
||||
for (i = ifrom; i < ito; i++) {
|
||||
|
||||
n = n_inner = 0;
|
||||
neighptr = ipage.vget();
|
||||
neighptr_inner = ipage_inner.vget();
|
||||
if (respamiddle) {
|
||||
n_middle = 0;
|
||||
neighptr_middle = ipage_middle->vget();
|
||||
}
|
||||
|
||||
itype = type[i];
|
||||
xtmp = x[i][0];
|
||||
ytmp = x[i][1];
|
||||
ztmp = x[i][2];
|
||||
ibin = coord2bin(x[i]);
|
||||
if (moltemplate) {
|
||||
imol = molindex[i];
|
||||
iatom = molatom[i];
|
||||
tagprev = tag[i] - iatom - 1;
|
||||
}
|
||||
|
||||
// loop over all atoms in surrounding bins in stencil including self
|
||||
// only store pair if i < j
|
||||
// stores own/own pairs only once
|
||||
// stores own/ghost pairs on both procs
|
||||
|
||||
for (k = 0; k < nstencil; k++) {
|
||||
for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) {
|
||||
if (j <= i) continue;
|
||||
|
||||
jtype = type[j];
|
||||
if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
|
||||
|
||||
delx = xtmp - x[j][0];
|
||||
dely = ytmp - x[j][1];
|
||||
delz = ztmp - x[j][2];
|
||||
rsq = delx*delx + dely*dely + delz*delz;
|
||||
|
||||
if (rsq <= cutneighsq[itype][jtype]) {
|
||||
if (molecular) {
|
||||
if (!moltemplate)
|
||||
which = find_special(special[i],nspecial[i],tag[j]);
|
||||
else if (imol >=0)
|
||||
which = find_special(onemols[imol]->special[iatom],
|
||||
onemols[imol]->nspecial[iatom],
|
||||
tag[j]-tagprev);
|
||||
else which = 0;
|
||||
if (which == 0) neighptr[n++] = j;
|
||||
else if ((minchange = domain->minimum_image_check(delx,dely,delz)))
|
||||
neighptr[n++] = j;
|
||||
else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
|
||||
} else neighptr[n++] = j;
|
||||
|
||||
if (rsq < cut_inner_sq) {
|
||||
if (which == 0) neighptr_inner[n_inner++] = j;
|
||||
else if (minchange) neighptr_inner[n_inner++] = j;
|
||||
else if (which > 0)
|
||||
neighptr_inner[n_inner++] = j ^ (which << SBBITS);
|
||||
}
|
||||
|
||||
if (respamiddle &&
|
||||
rsq < cut_middle_sq && rsq > cut_middle_inside_sq) {
|
||||
if (which == 0) neighptr_middle[n_middle++] = j;
|
||||
else if (minchange) neighptr_middle[n_middle++] = j;
|
||||
else if (which > 0)
|
||||
neighptr_middle[n_middle++] = j ^ (which << SBBITS);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ilist[i] = i;
|
||||
firstneigh[i] = neighptr;
|
||||
numneigh[i] = n;
|
||||
ipage.vgot(n);
|
||||
if (ipage.status())
|
||||
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
|
||||
|
||||
ilist_inner[i] = i;
|
||||
firstneigh_inner[i] = neighptr_inner;
|
||||
numneigh_inner[i] = n_inner;
|
||||
ipage.vgot(n_inner);
|
||||
if (ipage_inner.status())
|
||||
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
|
||||
|
||||
if (respamiddle) {
|
||||
ilist_middle[i] = i;
|
||||
firstneigh_middle[i] = neighptr_middle;
|
||||
numneigh_middle[i] = n_middle;
|
||||
ipage_middle->vgot(n_middle);
|
||||
if (ipage_middle->status())
|
||||
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
|
||||
}
|
||||
}
|
||||
NEIGH_OMP_CLOSE;
|
||||
list->inum = nlocal;
|
||||
listinner->inum = nlocal;
|
||||
if (respamiddle) listmiddle->inum = nlocal;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
multiple respa lists
|
||||
binned neighbor list construction with full Newton's 3rd law
|
||||
each owned atom i checks its own bin and other bins in Newton stencil
|
||||
every pair stored exactly once by some processor
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void Neighbor::respa_bin_newton_omp(NeighList *list)
|
||||
{
|
||||
// bin local & ghost atoms
|
||||
|
||||
if (binatomflag) bin_atoms();
|
||||
|
||||
const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
|
||||
const int molecular = atom->molecular;
|
||||
const int moltemplate = (molecular == 2) ? 1 : 0;
|
||||
|
||||
NEIGH_OMP_INIT;
|
||||
|
||||
NeighList *listinner = list->listinner;
|
||||
NeighList *listmiddle = list->listmiddle;
|
||||
const int respamiddle = list->respamiddle;
|
||||
|
||||
#if defined(_OPENMP)
|
||||
#pragma omp parallel default(none) shared(list,listinner,listmiddle)
|
||||
#endif
|
||||
NEIGH_OMP_SETUP(nlocal);
|
||||
|
||||
int i,j,k,n,itype,jtype,ibin,n_inner,n_middle,imol,iatom;
|
||||
tagint tagprev;
|
||||
double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
|
||||
int *neighptr,*neighptr_inner,*neighptr_middle;
|
||||
|
||||
// loop over each atom, storing neighbors
|
||||
|
||||
double **x = atom->x;
|
||||
int *type = atom->type;
|
||||
int *mask = atom->mask;
|
||||
tagint *tag = atom->tag;
|
||||
tagint *molecule = atom->molecule;
|
||||
tagint **special = atom->special;
|
||||
int **nspecial = atom->nspecial;
|
||||
|
||||
int *molindex = atom->molindex;
|
||||
int *molatom = atom->molatom;
|
||||
Molecule **onemols = atom->avec->onemols;
|
||||
|
||||
int *ilist = list->ilist;
|
||||
int *numneigh = list->numneigh;
|
||||
int **firstneigh = list->firstneigh;
|
||||
int nstencil = list->nstencil;
|
||||
int *stencil = list->stencil;
|
||||
|
||||
int *ilist_inner = listinner->ilist;
|
||||
int *numneigh_inner = listinner->numneigh;
|
||||
int **firstneigh_inner = listinner->firstneigh;
|
||||
|
||||
int *ilist_middle,*numneigh_middle,**firstneigh_middle;
|
||||
if (respamiddle) {
|
||||
ilist_middle = listmiddle->ilist;
|
||||
numneigh_middle = listmiddle->numneigh;
|
||||
firstneigh_middle = listmiddle->firstneigh;
|
||||
}
|
||||
|
||||
// each thread has its own page allocator
|
||||
MyPage<int> &ipage = list->ipage[tid];
|
||||
MyPage<int> &ipage_inner = listinner->ipage[tid];
|
||||
ipage.reset();
|
||||
ipage_inner.reset();
|
||||
|
||||
MyPage<int> *ipage_middle;
|
||||
if (respamiddle) {
|
||||
ipage_middle = listmiddle->ipage + tid;
|
||||
ipage_middle->reset();
|
||||
}
|
||||
|
||||
int which = 0;
|
||||
int minchange = 0;
|
||||
|
||||
for (i = ifrom; i < ito; i++) {
|
||||
|
||||
n = n_inner = 0;
|
||||
neighptr = ipage.vget();
|
||||
neighptr_inner = ipage_inner.vget();
|
||||
if (respamiddle) {
|
||||
n_middle = 0;
|
||||
neighptr_middle = ipage_middle->vget();
|
||||
}
|
||||
|
||||
itype = type[i];
|
||||
xtmp = x[i][0];
|
||||
ytmp = x[i][1];
|
||||
ztmp = x[i][2];
|
||||
if (moltemplate) {
|
||||
imol = molindex[i];
|
||||
iatom = molatom[i];
|
||||
tagprev = tag[i] - iatom - 1;
|
||||
}
|
||||
|
||||
// loop over rest of atoms in i's bin, ghosts are at end of linked list
|
||||
// if j is owned atom, store it, since j is beyond i in linked list
|
||||
// if j is ghost, only store if j coords are "above and to the right" of i
|
||||
|
||||
for (j = bins[i]; j >= 0; j = bins[j]) {
|
||||
if (j >= nlocal) {
|
||||
if (x[j][2] < ztmp) continue;
|
||||
if (x[j][2] == ztmp) {
|
||||
if (x[j][1] < ytmp) continue;
|
||||
if (x[j][1] == ytmp && x[j][0] < xtmp) continue;
|
||||
}
|
||||
}
|
||||
|
||||
jtype = type[j];
|
||||
if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
|
||||
|
||||
delx = xtmp - x[j][0];
|
||||
dely = ytmp - x[j][1];
|
||||
delz = ztmp - x[j][2];
|
||||
rsq = delx*delx + dely*dely + delz*delz;
|
||||
|
||||
if (rsq <= cutneighsq[itype][jtype]) {
|
||||
if (molecular) {
|
||||
if (!moltemplate)
|
||||
which = find_special(special[i],nspecial[i],tag[j]);
|
||||
else if (imol >=0)
|
||||
which = find_special(onemols[imol]->special[iatom],
|
||||
onemols[imol]->nspecial[iatom],
|
||||
tag[j]-tagprev);
|
||||
else which = 0;
|
||||
if (which == 0) neighptr[n++] = j;
|
||||
else if ((minchange = domain->minimum_image_check(delx,dely,delz)))
|
||||
neighptr[n++] = j;
|
||||
else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
|
||||
} else neighptr[n++] = j;
|
||||
|
||||
if (rsq < cut_inner_sq) {
|
||||
if (which == 0) neighptr_inner[n_inner++] = j;
|
||||
else if (minchange) neighptr_inner[n_inner++] = j;
|
||||
else if (which > 0) neighptr_inner[n_inner++] = j ^ (which << SBBITS);
|
||||
}
|
||||
|
||||
if (respamiddle &&
|
||||
rsq < cut_middle_sq && rsq > cut_middle_inside_sq) {
|
||||
if (which == 0) neighptr_middle[n_middle++] = j;
|
||||
else if (minchange) neighptr_middle[n_middle++] = j;
|
||||
else if (which > 0)
|
||||
neighptr_middle[n_middle++] = j ^ (which << SBBITS);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// loop over all atoms in other bins in stencil, store every pair
|
||||
|
||||
ibin = coord2bin(x[i]);
|
||||
for (k = 0; k < nstencil; k++) {
|
||||
for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) {
|
||||
jtype = type[j];
|
||||
if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
|
||||
|
||||
delx = xtmp - x[j][0];
|
||||
dely = ytmp - x[j][1];
|
||||
delz = ztmp - x[j][2];
|
||||
rsq = delx*delx + dely*dely + delz*delz;
|
||||
|
||||
if (rsq <= cutneighsq[itype][jtype]) {
|
||||
if (molecular) {
|
||||
if (!moltemplate)
|
||||
which = find_special(special[i],nspecial[i],tag[j]);
|
||||
else if (imol >=0)
|
||||
which = find_special(onemols[imol]->special[iatom],
|
||||
onemols[imol]->nspecial[iatom],
|
||||
tag[j]-tagprev);
|
||||
else which = 0;
|
||||
if (which == 0) neighptr[n++] = j;
|
||||
else if ((minchange = domain->minimum_image_check(delx,dely,delz)))
|
||||
neighptr[n++] = j;
|
||||
else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
|
||||
} else neighptr[n++] = j;
|
||||
|
||||
if (rsq < cut_inner_sq) {
|
||||
if (which == 0) neighptr_inner[n_inner++] = j;
|
||||
else if (minchange) neighptr_inner[n_inner++] = j;
|
||||
else if (which > 0)
|
||||
neighptr_inner[n_inner++] = j ^ (which << SBBITS);
|
||||
}
|
||||
|
||||
if (respamiddle &&
|
||||
rsq < cut_middle_sq && rsq > cut_middle_inside_sq) {
|
||||
if (which == 0) neighptr_middle[n_middle++] = j;
|
||||
else if (minchange) neighptr_middle[n_middle++] = j;
|
||||
else if (which > 0)
|
||||
neighptr_middle[n_middle++] = j ^ (which << SBBITS);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ilist[i] = i;
|
||||
firstneigh[i] = neighptr;
|
||||
numneigh[i] = n;
|
||||
ipage.vgot(n);
|
||||
if (ipage.status())
|
||||
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
|
||||
|
||||
ilist_inner[i] = i;
|
||||
firstneigh_inner[i] = neighptr_inner;
|
||||
numneigh_inner[i] = n_inner;
|
||||
ipage.vgot(n_inner);
|
||||
if (ipage_inner.status())
|
||||
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
|
||||
|
||||
if (respamiddle) {
|
||||
ilist_middle[i] = i;
|
||||
firstneigh_middle[i] = neighptr_middle;
|
||||
numneigh_middle[i] = n_middle;
|
||||
ipage_middle->vgot(n_middle);
|
||||
if (ipage_middle->status())
|
||||
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
|
||||
}
|
||||
}
|
||||
NEIGH_OMP_CLOSE;
|
||||
list->inum = nlocal;
|
||||
listinner->inum = nlocal;
|
||||
if (respamiddle) listmiddle->inum = nlocal;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
multiple respa lists
|
||||
binned neighbor list construction with Newton's 3rd law for triclinic
|
||||
each owned atom i checks its own bin and other bins in triclinic stencil
|
||||
every pair stored exactly once by some processor
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void Neighbor::respa_bin_newton_tri_omp(NeighList *list)
|
||||
{
|
||||
// bin local & ghost atoms
|
||||
|
||||
if (binatomflag) bin_atoms();
|
||||
|
||||
const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
|
||||
const int molecular = atom->molecular;
|
||||
const int moltemplate = (molecular == 2) ? 1 : 0;
|
||||
|
||||
NEIGH_OMP_INIT;
|
||||
|
||||
NeighList *listinner = list->listinner;
|
||||
NeighList *listmiddle = list->listmiddle;
|
||||
const int respamiddle = list->respamiddle;
|
||||
|
||||
#if defined(_OPENMP)
|
||||
#pragma omp parallel default(none) shared(list,listinner,listmiddle)
|
||||
#endif
|
||||
NEIGH_OMP_SETUP(nlocal);
|
||||
|
||||
int i,j,k,n,itype,jtype,ibin,n_inner,n_middle,imol,iatom;
|
||||
tagint tagprev;
|
||||
double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
|
||||
int *neighptr,*neighptr_inner,*neighptr_middle;
|
||||
|
||||
// loop over each atom, storing neighbors
|
||||
|
||||
double **x = atom->x;
|
||||
int *type = atom->type;
|
||||
int *mask = atom->mask;
|
||||
tagint *tag = atom->tag;
|
||||
tagint *molecule = atom->molecule;
|
||||
tagint **special = atom->special;
|
||||
int **nspecial = atom->nspecial;
|
||||
|
||||
int *molindex = atom->molindex;
|
||||
int *molatom = atom->molatom;
|
||||
Molecule **onemols = atom->avec->onemols;
|
||||
|
||||
int *ilist = list->ilist;
|
||||
int *numneigh = list->numneigh;
|
||||
int **firstneigh = list->firstneigh;
|
||||
int nstencil = list->nstencil;
|
||||
int *stencil = list->stencil;
|
||||
|
||||
int *ilist_inner = listinner->ilist;
|
||||
int *numneigh_inner = listinner->numneigh;
|
||||
int **firstneigh_inner = listinner->firstneigh;
|
||||
|
||||
int *ilist_middle,*numneigh_middle,**firstneigh_middle;
|
||||
if (respamiddle) {
|
||||
ilist_middle = listmiddle->ilist;
|
||||
numneigh_middle = listmiddle->numneigh;
|
||||
firstneigh_middle = listmiddle->firstneigh;
|
||||
}
|
||||
|
||||
// each thread has its own page allocator
|
||||
MyPage<int> &ipage = list->ipage[tid];
|
||||
MyPage<int> &ipage_inner = listinner->ipage[tid];
|
||||
ipage.reset();
|
||||
ipage_inner.reset();
|
||||
|
||||
MyPage<int> *ipage_middle;
|
||||
if (respamiddle) {
|
||||
ipage_middle = listmiddle->ipage + tid;
|
||||
ipage_middle->reset();
|
||||
}
|
||||
|
||||
int which = 0;
|
||||
int minchange = 0;
|
||||
|
||||
for (i = ifrom; i < ito; i++) {
|
||||
|
||||
n = n_inner = 0;
|
||||
neighptr = ipage.vget();
|
||||
neighptr_inner = ipage_inner.vget();
|
||||
if (respamiddle) {
|
||||
n_middle = 0;
|
||||
neighptr_middle = ipage_middle->vget();
|
||||
}
|
||||
|
||||
itype = type[i];
|
||||
xtmp = x[i][0];
|
||||
ytmp = x[i][1];
|
||||
ztmp = x[i][2];
|
||||
if (moltemplate) {
|
||||
imol = molindex[i];
|
||||
iatom = molatom[i];
|
||||
tagprev = tag[i] - iatom - 1;
|
||||
}
|
||||
|
||||
// loop over all atoms in bins in stencil
|
||||
// pairs for atoms j "below" i are excluded
|
||||
// below = lower z or (equal z and lower y) or (equal zy and lower x)
|
||||
// (equal zyx and j <= i)
|
||||
// latter excludes self-self interaction but allows superposed atoms
|
||||
|
||||
ibin = coord2bin(x[i]);
|
||||
for (k = 0; k < nstencil; k++) {
|
||||
for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) {
|
||||
if (x[j][2] < ztmp) continue;
|
||||
if (x[j][2] == ztmp) {
|
||||
if (x[j][1] < ytmp) continue;
|
||||
if (x[j][1] == ytmp) {
|
||||
if (x[j][0] < xtmp) continue;
|
||||
if (x[j][0] == xtmp && j <= i) continue;
|
||||
}
|
||||
}
|
||||
|
||||
jtype = type[j];
|
||||
if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
|
||||
|
||||
delx = xtmp - x[j][0];
|
||||
dely = ytmp - x[j][1];
|
||||
delz = ztmp - x[j][2];
|
||||
rsq = delx*delx + dely*dely + delz*delz;
|
||||
|
||||
if (rsq <= cutneighsq[itype][jtype]) {
|
||||
if (molecular) {
|
||||
if (!moltemplate)
|
||||
which = find_special(special[i],nspecial[i],tag[j]);
|
||||
else if (imol >=0)
|
||||
which = find_special(onemols[imol]->special[iatom],
|
||||
onemols[imol]->nspecial[iatom],
|
||||
tag[j]-tagprev);
|
||||
else which = 0;
|
||||
if (which == 0) neighptr[n++] = j;
|
||||
else if ((minchange = domain->minimum_image_check(delx,dely,delz)))
|
||||
neighptr[n++] = j;
|
||||
else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
|
||||
} else neighptr[n++] = j;
|
||||
|
||||
if (rsq < cut_inner_sq) {
|
||||
if (which == 0) neighptr_inner[n_inner++] = j;
|
||||
else if (minchange) neighptr_inner[n_inner++] = j;
|
||||
else if (which > 0)
|
||||
neighptr_inner[n_inner++] = j ^ (which << SBBITS);
|
||||
}
|
||||
|
||||
if (respamiddle &&
|
||||
rsq < cut_middle_sq && rsq > cut_middle_inside_sq) {
|
||||
if (which == 0) neighptr_middle[n_middle++] = j;
|
||||
else if (minchange) neighptr_middle[n_middle++] = j;
|
||||
else if (which > 0)
|
||||
neighptr_middle[n_middle++] = j ^ (which << SBBITS);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ilist[i] = i;
|
||||
firstneigh[i] = neighptr;
|
||||
numneigh[i] = n;
|
||||
ipage.vgot(n);
|
||||
if (ipage.status())
|
||||
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
|
||||
|
||||
ilist_inner[i] = i;
|
||||
firstneigh_inner[i] = neighptr_inner;
|
||||
numneigh_inner[i] = n_inner;
|
||||
ipage.vgot(n_inner);
|
||||
if (ipage_inner.status())
|
||||
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
|
||||
|
||||
if (respamiddle) {
|
||||
ilist_middle[i] = i;
|
||||
firstneigh_middle[i] = neighptr_middle;
|
||||
numneigh_middle[i] = n_middle;
|
||||
ipage_middle->vgot(n_middle);
|
||||
if (ipage_middle->status())
|
||||
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
|
||||
}
|
||||
}
|
||||
NEIGH_OMP_CLOSE;
|
||||
list->inum = nlocal;
|
||||
listinner->inum = nlocal;
|
||||
if (respamiddle) listmiddle->inum = nlocal;
|
||||
}
|
||||
166
src/USER-OMP/npair_full_bin_ghost_omp.cpp
Normal file
166
src/USER-OMP/npair_full_bin_ghost_omp.cpp
Normal file
@ -0,0 +1,166 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "npair_full_bin_ghost_omp.h"
|
||||
#include "npair_omp.h"
|
||||
#include "neighbor.h"
|
||||
#include "neigh_list.h"
|
||||
#include "atom.h"
|
||||
#include "atom_vec.h"
|
||||
#include "molecule.h"
|
||||
#include "domain.h"
|
||||
#include "my_page.h"
|
||||
#include "error.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
using namespace NeighConst;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
NPairFullBinGhostOmp::NPairFullBinGhostOmp(LAMMPS *lmp) : NPair(lmp) {}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
binned neighbor list construction for all neighbors
|
||||
include neighbors of ghost atoms, but no "special neighbors" for ghosts
|
||||
every neighbor pair appears in list of both atoms i and j
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void NPairFullBinGhostOmp::build(NeighList *list)
|
||||
{
|
||||
const int nlocal = atom->nlocal;
|
||||
const int nall = nlocal + atom->nghost;
|
||||
const int molecular = atom->molecular;
|
||||
const int moltemplate = (molecular == 2) ? 1 : 0;
|
||||
|
||||
NPAIR_OMP_INIT;
|
||||
#if defined(_OPENMP)
|
||||
#pragma omp parallel default(none) shared(list)
|
||||
#endif
|
||||
NPAIR_OMP_SETUP(nall);
|
||||
|
||||
int i,j,k,n,itype,jtype,ibin,which,imol,iatom;
|
||||
tagint tagprev;
|
||||
double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
|
||||
int xbin,ybin,zbin,xbin2,ybin2,zbin2;
|
||||
int *neighptr;
|
||||
|
||||
double **x = atom->x;
|
||||
int *type = atom->type;
|
||||
int *mask = atom->mask;
|
||||
tagint *tag = atom->tag;
|
||||
tagint *molecule = atom->molecule;
|
||||
tagint **special = atom->special;
|
||||
int **nspecial = atom->nspecial;
|
||||
|
||||
int *molindex = atom->molindex;
|
||||
int *molatom = atom->molatom;
|
||||
Molecule **onemols = atom->avec->onemols;
|
||||
|
||||
int *ilist = list->ilist;
|
||||
int *numneigh = list->numneigh;
|
||||
int **firstneigh = list->firstneigh;
|
||||
|
||||
// each thread has its own page allocator
|
||||
MyPage<int> &ipage = list->ipage[tid];
|
||||
ipage.reset();
|
||||
|
||||
// loop over owned & ghost atoms, storing neighbors
|
||||
|
||||
for (i = ifrom; i < ito; i++) {
|
||||
|
||||
n = 0;
|
||||
neighptr = ipage.vget();
|
||||
|
||||
itype = type[i];
|
||||
xtmp = x[i][0];
|
||||
ytmp = x[i][1];
|
||||
ztmp = x[i][2];
|
||||
if (moltemplate) {
|
||||
imol = molindex[i];
|
||||
iatom = molatom[i];
|
||||
tagprev = tag[i] - iatom - 1;
|
||||
}
|
||||
|
||||
// loop over all atoms in surrounding bins in stencil including self
|
||||
// when i is a ghost atom, must check if stencil bin is out of bounds
|
||||
// skip i = j
|
||||
// no molecular test when i = ghost atom
|
||||
|
||||
if (i < nlocal) {
|
||||
ibin = coord2bin(x[i]);
|
||||
for (k = 0; k < nstencil; k++) {
|
||||
for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) {
|
||||
if (i == j) continue;
|
||||
|
||||
jtype = type[j];
|
||||
if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
|
||||
|
||||
delx = xtmp - x[j][0];
|
||||
dely = ytmp - x[j][1];
|
||||
delz = ztmp - x[j][2];
|
||||
rsq = delx*delx + dely*dely + delz*delz;
|
||||
|
||||
if (rsq <= cutneighsq[itype][jtype]) {
|
||||
if (molecular) {
|
||||
if (!moltemplate)
|
||||
which = find_special(special[i],nspecial[i],tag[j]);
|
||||
else if (imol >=0)
|
||||
which = find_special(onemols[imol]->special[iatom],
|
||||
onemols[imol]->nspecial[iatom],
|
||||
tag[j]-tagprev);
|
||||
else which = 0;
|
||||
if (which == 0) neighptr[n++] = j;
|
||||
else if (domain->minimum_image_check(delx,dely,delz))
|
||||
neighptr[n++] = j;
|
||||
else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
|
||||
} else neighptr[n++] = j;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} else {
|
||||
ibin = coord2bin(x[i],xbin,ybin,zbin);
|
||||
for (k = 0; k < nstencil; k++) {
|
||||
xbin2 = xbin + stencilxyz[k][0];
|
||||
ybin2 = ybin + stencilxyz[k][1];
|
||||
zbin2 = zbin + stencilxyz[k][2];
|
||||
if (xbin2 < 0 || xbin2 >= mbinx ||
|
||||
ybin2 < 0 || ybin2 >= mbiny ||
|
||||
zbin2 < 0 || zbin2 >= mbinz) continue;
|
||||
for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) {
|
||||
if (i == j) continue;
|
||||
|
||||
jtype = type[j];
|
||||
if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
|
||||
|
||||
delx = xtmp - x[j][0];
|
||||
dely = ytmp - x[j][1];
|
||||
delz = ztmp - x[j][2];
|
||||
rsq = delx*delx + dely*dely + delz*delz;
|
||||
|
||||
if (rsq <= cutneighghostsq[itype][jtype]) neighptr[n++] = j;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ilist[i] = i;
|
||||
firstneigh[i] = neighptr;
|
||||
numneigh[i] = n;
|
||||
ipage.vgot(n);
|
||||
if (ipage.status())
|
||||
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
|
||||
}
|
||||
NPAIR_OMP_CLOSE;
|
||||
list->inum = nlocal;
|
||||
list->gnum = nall - nlocal;
|
||||
}
|
||||
44
src/USER-OMP/npair_full_bin_ghost_omp.h
Normal file
44
src/USER-OMP/npair_full_bin_ghost_omp.h
Normal file
@ -0,0 +1,44 @@
|
||||
/* -*- c++ -*- ----------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef NPAIR_CLASS
|
||||
|
||||
NPairStyle(full/bin/ghost/omp,
|
||||
NPairFullBinGhostOmp,
|
||||
NP_FULL | NP_BIN | NP_GHOST | NP_OMP | NP_NEWTON | NP_NEWTOFF |
|
||||
NP_ORTHO | NP_TRI)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_NPAIR_FULL_BIN_GHOST_OMP_H
|
||||
#define LMP_NPAIR_FULL_BIN_GHOST_OMP_H
|
||||
|
||||
#include "npair.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class NPairFullBinGhostOmp : public NPair {
|
||||
public:
|
||||
NPairFullBinGhostOmp(class LAMMPS *);
|
||||
~NPairFullBinGhostOmp() {}
|
||||
void build(class NeighList *);
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* ERROR/WARNING messages:
|
||||
|
||||
*/
|
||||
135
src/USER-OMP/npair_full_bin_omp.cpp
Normal file
135
src/USER-OMP/npair_full_bin_omp.cpp
Normal file
@ -0,0 +1,135 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "npair_full_bin_omp.h"
|
||||
#include "npair_omp.h"
|
||||
#include "neighbor.h"
|
||||
#include "neigh_list.h"
|
||||
#include "atom.h"
|
||||
#include "atom_vec.h"
|
||||
#include "molecule.h"
|
||||
#include "domain.h"
|
||||
#include "my_page.h"
|
||||
#include "error.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
using namespace NeighConst;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
NPairFullBinOmp::NPairFullBinOmp(LAMMPS *lmp) : NPair(lmp) {}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
binned neighbor list construction for all neighbors
|
||||
every neighbor pair appears in list of both atoms i and j
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void NPairFullBinOmp::build(NeighList *list)
|
||||
{
|
||||
const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
|
||||
const int molecular = atom->molecular;
|
||||
const int moltemplate = (molecular == 2) ? 1 : 0;
|
||||
|
||||
NPAIR_OMP_INIT;
|
||||
#if defined(_OPENMP)
|
||||
#pragma omp parallel default(none) shared(list)
|
||||
#endif
|
||||
NPAIR_OMP_SETUP(nlocal);
|
||||
|
||||
int i,j,k,n,itype,jtype,ibin,which,imol,iatom;
|
||||
tagint tagprev;
|
||||
double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
|
||||
int *neighptr;
|
||||
|
||||
double **x = atom->x;
|
||||
int *type = atom->type;
|
||||
int *mask = atom->mask;
|
||||
tagint *tag = atom->tag;
|
||||
tagint *molecule = atom->molecule;
|
||||
tagint **special = atom->special;
|
||||
int **nspecial = atom->nspecial;
|
||||
int *molindex = atom->molindex;
|
||||
int *molatom = atom->molatom;
|
||||
Molecule **onemols = atom->avec->onemols;
|
||||
|
||||
int *ilist = list->ilist;
|
||||
int *numneigh = list->numneigh;
|
||||
int **firstneigh = list->firstneigh;
|
||||
|
||||
// each thread has its own page allocator
|
||||
MyPage<int> &ipage = list->ipage[tid];
|
||||
ipage.reset();
|
||||
|
||||
// loop over owned atoms, storing neighbors
|
||||
|
||||
for (i = ifrom; i < ito; i++) {
|
||||
|
||||
n = 0;
|
||||
neighptr = ipage.vget();
|
||||
|
||||
itype = type[i];
|
||||
xtmp = x[i][0];
|
||||
ytmp = x[i][1];
|
||||
ztmp = x[i][2];
|
||||
if (moltemplate) {
|
||||
imol = molindex[i];
|
||||
iatom = molatom[i];
|
||||
tagprev = tag[i] - iatom - 1;
|
||||
}
|
||||
|
||||
// loop over all atoms in surrounding bins in stencil including self
|
||||
// skip i = j
|
||||
|
||||
ibin = coord2bin(x[i]);
|
||||
|
||||
for (k = 0; k < nstencil; k++) {
|
||||
for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) {
|
||||
if (i == j) continue;
|
||||
|
||||
jtype = type[j];
|
||||
if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
|
||||
|
||||
delx = xtmp - x[j][0];
|
||||
dely = ytmp - x[j][1];
|
||||
delz = ztmp - x[j][2];
|
||||
rsq = delx*delx + dely*dely + delz*delz;
|
||||
|
||||
if (rsq <= cutneighsq[itype][jtype]) {
|
||||
if (molecular) {
|
||||
if (!moltemplate)
|
||||
which = find_special(special[i],nspecial[i],tag[j]);
|
||||
else if (imol >=0)
|
||||
which = find_special(onemols[imol]->special[iatom],
|
||||
onemols[imol]->nspecial[iatom],
|
||||
tag[j]-tagprev);
|
||||
else which = 0;
|
||||
if (which == 0) neighptr[n++] = j;
|
||||
else if (domain->minimum_image_check(delx,dely,delz))
|
||||
neighptr[n++] = j;
|
||||
else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
|
||||
} else neighptr[n++] = j;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ilist[i] = i;
|
||||
firstneigh[i] = neighptr;
|
||||
numneigh[i] = n;
|
||||
ipage.vgot(n);
|
||||
if (ipage.status())
|
||||
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
|
||||
}
|
||||
NPAIR_OMP_CLOSE;
|
||||
list->inum = nlocal;
|
||||
list->gnum = 0;
|
||||
}
|
||||
44
src/USER-OMP/npair_full_bin_omp.h
Normal file
44
src/USER-OMP/npair_full_bin_omp.h
Normal file
@ -0,0 +1,44 @@
|
||||
/* -*- c++ -*- ----------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef NPAIR_CLASS
|
||||
|
||||
NPairStyle(full/bin/omp,
|
||||
NPairFullBinOmp,
|
||||
NP_FULL | NP_BIN | NP_OMP | NP_NEWTON | NP_NEWTOFF |
|
||||
NP_ORTHO | NP_TRI)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_NPAIR_FULL_BIN_OMP_H
|
||||
#define LMP_NPAIR_FULL_BIN_OMP_H
|
||||
|
||||
#include "npair.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class NPairFullBinOmp : public NPair {
|
||||
public:
|
||||
NPairFullBinOmp(class LAMMPS *);
|
||||
~NPairFullBinOmp() {}
|
||||
void build(class NeighList *);
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* ERROR/WARNING messages:
|
||||
|
||||
*/
|
||||
143
src/USER-OMP/npair_full_multi_omp.cpp
Normal file
143
src/USER-OMP/npair_full_multi_omp.cpp
Normal file
@ -0,0 +1,143 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "npair_full_multi_omp.h"
|
||||
#include "npair_omp.h"
|
||||
#include "neighbor.h"
|
||||
#include "neigh_list.h"
|
||||
#include "atom.h"
|
||||
#include "atom_vec.h"
|
||||
#include "molecule.h"
|
||||
#include "domain.h"
|
||||
#include "my_page.h"
|
||||
#include "error.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
using namespace NeighConst;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
NPairFullMultiOmp::NPairFullMultiOmp(LAMMPS *lmp) : NPair(lmp) {}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
binned neighbor list construction for all neighbors
|
||||
multi-type stencil is itype dependent and is distance checked
|
||||
every neighbor pair appears in list of both atoms i and j
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void NPairFullMultiOmp::build(NeighList *list)
|
||||
{
|
||||
const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
|
||||
const int molecular = atom->molecular;
|
||||
const int moltemplate = (molecular == 2) ? 1 : 0;
|
||||
|
||||
NPAIR_OMP_INIT;
|
||||
#if defined(_OPENMP)
|
||||
#pragma omp parallel default(none) shared(list)
|
||||
#endif
|
||||
NPAIR_OMP_SETUP(nlocal);
|
||||
|
||||
int i,j,k,n,itype,jtype,ibin,which,ns,imol,iatom;
|
||||
tagint tagprev;
|
||||
double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
|
||||
int *neighptr,*s;
|
||||
double *cutsq,*distsq;
|
||||
|
||||
// loop over each atom, storing neighbors
|
||||
|
||||
double **x = atom->x;
|
||||
int *type = atom->type;
|
||||
int *mask = atom->mask;
|
||||
tagint *tag = atom->tag;
|
||||
tagint *molecule = atom->molecule;
|
||||
tagint **special = atom->special;
|
||||
int **nspecial = atom->nspecial;
|
||||
|
||||
int *molindex = atom->molindex;
|
||||
int *molatom = atom->molatom;
|
||||
Molecule **onemols = atom->avec->onemols;
|
||||
|
||||
int *ilist = list->ilist;
|
||||
int *numneigh = list->numneigh;
|
||||
int **firstneigh = list->firstneigh;
|
||||
|
||||
// each thread has its own page allocator
|
||||
MyPage<int> &ipage = list->ipage[tid];
|
||||
ipage.reset();
|
||||
|
||||
for (i = ifrom; i < ito; i++) {
|
||||
|
||||
n = 0;
|
||||
neighptr = ipage.vget();
|
||||
|
||||
itype = type[i];
|
||||
xtmp = x[i][0];
|
||||
ytmp = x[i][1];
|
||||
ztmp = x[i][2];
|
||||
if (moltemplate) {
|
||||
imol = molindex[i];
|
||||
iatom = molatom[i];
|
||||
tagprev = tag[i] - iatom - 1;
|
||||
}
|
||||
|
||||
// loop over all atoms in other bins in stencil, including self
|
||||
// skip if i,j neighbor cutoff is less than bin distance
|
||||
// skip i = j
|
||||
|
||||
ibin = coord2bin(x[i]);
|
||||
s = stencil_multi[itype];
|
||||
distsq = distsq_multi[itype];
|
||||
cutsq = cutneighsq[itype];
|
||||
ns = nstencil_multi[itype];
|
||||
for (k = 0; k < ns; k++) {
|
||||
for (j = binhead[ibin+s[k]]; j >= 0; j = bins[j]) {
|
||||
jtype = type[j];
|
||||
if (cutsq[jtype] < distsq[k]) continue;
|
||||
if (i == j) continue;
|
||||
|
||||
if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
|
||||
|
||||
delx = xtmp - x[j][0];
|
||||
dely = ytmp - x[j][1];
|
||||
delz = ztmp - x[j][2];
|
||||
rsq = delx*delx + dely*dely + delz*delz;
|
||||
|
||||
if (rsq <= cutneighsq[itype][jtype]) {
|
||||
if (molecular) {
|
||||
if (!moltemplate)
|
||||
which = find_special(special[i],nspecial[i],tag[j]);
|
||||
else if (imol >=0)
|
||||
which = find_special(onemols[imol]->special[iatom],
|
||||
onemols[imol]->nspecial[iatom],
|
||||
tag[j]-tagprev);
|
||||
else which = 0;
|
||||
if (which == 0) neighptr[n++] = j;
|
||||
else if (domain->minimum_image_check(delx,dely,delz))
|
||||
neighptr[n++] = j;
|
||||
else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
|
||||
} else neighptr[n++] = j;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ilist[i] = i;
|
||||
firstneigh[i] = neighptr;
|
||||
numneigh[i] = n;
|
||||
ipage.vgot(n);
|
||||
if (ipage.status())
|
||||
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
|
||||
}
|
||||
NPAIR_OMP_CLOSE;
|
||||
list->inum = nlocal;
|
||||
list->gnum = 0;
|
||||
}
|
||||
44
src/USER-OMP/npair_full_multi_omp.h
Normal file
44
src/USER-OMP/npair_full_multi_omp.h
Normal file
@ -0,0 +1,44 @@
|
||||
/* -*- c++ -*- ----------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef NPAIR_CLASS
|
||||
|
||||
NPairStyle(full/multi/omp,
|
||||
NPairFullMultiOmp,
|
||||
NP_FULL | NP_MULTI | NP_OMP |
|
||||
NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_NPAIR_FULL_MULTI_OMP_H
|
||||
#define LMP_NPAIR_FULL_MULTI_OMP_H
|
||||
|
||||
#include "npair.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class NPairFullMultiOmp : public NPair {
|
||||
public:
|
||||
NPairFullMultiOmp(class LAMMPS *);
|
||||
~NPairFullMultiOmp() {}
|
||||
void build(class NeighList *);
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* ERROR/WARNING messages:
|
||||
|
||||
*/
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user