Porting pair gran/hooke/history to Kokkos.

This commit is contained in:
Denis Taniguchi
2018-07-03 11:10:27 -04:00
parent a52ddf8759
commit 0a5b027bac
13 changed files with 3814 additions and 89 deletions

View File

@ -63,6 +63,8 @@ PairGranHookeHistory::PairGranHookeHistory(LAMMPS *lmp) : Pair(lmp)
PairGranHookeHistory::~PairGranHookeHistory()
{
if (copymode) return;
delete [] svector;
if (fix_history) modify->delete_fix("NEIGH_HISTORY");

View File

@ -71,6 +71,8 @@ action atom_vec_kokkos.cpp
action atom_vec_kokkos.h
action atom_vec_molecular_kokkos.cpp atom_vec_molecular.cpp
action atom_vec_molecular_kokkos.h atom_vec_molecular.h
action atom_vec_sphere_kokkos.cpp atom_vec_sphere.cpp
action atom_vec_sphere_kokkos.h atom_vec_sphere.h
action bond_class2_kokkos.cpp bond_class2.cpp
action bond_class2_kokkos.h bond_class2.h
action bond_fene_kokkos.cpp bond_fene.cpp
@ -97,6 +99,8 @@ action fix_eos_table_rx_kokkos.cpp fix_eos_table_rx.cpp
action fix_eos_table_rx_kokkos.h fix_eos_table_rx.h
action fix_langevin_kokkos.cpp
action fix_langevin_kokkos.h
action fix_neigh_history_kokkos.cpp
action fix_neigh_history_kokkos.h
action fix_nh_kokkos.cpp
action fix_nh_kokkos.h
action fix_nph_kokkos.cpp
@ -191,6 +195,8 @@ action pair_eam_fs_kokkos.cpp pair_eam_fs.cpp
action pair_eam_fs_kokkos.h pair_eam_fs.h
action pair_exp6_rx_kokkos.cpp pair_exp6_rx.cpp
action pair_exp6_rx_kokkos.h pair_exp6_rx.h
action pair_gran_hooke_history_kokkos.h
action pair_gran_hooke_history_kokkos.cpp
action pair_hybrid_kokkos.cpp
action pair_hybrid_kokkos.h
action pair_hybrid_overlay_kokkos.cpp

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,155 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef ATOM_CLASS
AtomStyle(sphere/kk,AtomVecSphereKokkos)
#else
#ifndef LMP_ATOM_VEC_SPHERE_KOKKOS_H
#define LMP_ATOM_VEC_SPHERE_KOKKOS_H
#include "atom_vec_kokkos.h"
#include "kokkos_type.h"
namespace LAMMPS_NS {
class AtomVecSphereKokkos : public AtomVecKokkos {
public:
AtomVecSphereKokkos(class LAMMPS *);
~AtomVecSphereKokkos() {}
void init();
void grow(int);
void grow_reset();
void copy(int, int, int);
int pack_comm(int, int *, double *, int, int *);
int pack_comm_vel(int, int *, double *, int, int *);
int pack_comm_hybrid(int, int *, double *);
void unpack_comm(int, int, double *);
void unpack_comm_vel(int, int, double *);
int unpack_comm_hybrid(int, int, double *);
int pack_reverse(int, int, double *);
int pack_reverse_hybrid(int, int, double *);
void unpack_reverse(int, int *, double *);
int unpack_reverse_hybrid(int, int *, double *);
int pack_border(int, int *, double *, int, int *);
int pack_border_vel(int, int *, double *, int, int *);
int pack_border_hybrid(int, int *, double *);
void unpack_border(int, int, double *);
void unpack_border_vel(int, int, double *);
int unpack_border_hybrid(int, int, double *);
int pack_exchange(int, double *);
int unpack_exchange(double *);
int size_restart();
int pack_restart(int, double *);
int unpack_restart(double *);
void create_atom(int, double *);
void data_atom(double *, imageint, char **);
int data_atom_hybrid(int, char **);
void data_vel(int, char **);
int data_vel_hybrid(int, char **);
void pack_data(double **);
int pack_data_hybrid(int, double *);
void write_data(FILE *, int, double **);
int write_data_hybrid(FILE *, double *);
void pack_vel(double **);
int pack_vel_hybrid(int, double *);
void write_vel(FILE *, int, double **);
int write_vel_hybrid(FILE *, double *);
bigint memory_usage();
int pack_comm_kokkos(const int &n, const DAT::tdual_int_2d &k_sendlist,
const int & iswap,
const DAT::tdual_xfloat_2d &buf,
const int &pbc_flag, const int pbc[]);
void unpack_comm_kokkos(const int &n, const int &nfirst,
const DAT::tdual_xfloat_2d &buf);
int pack_comm_self(const int &n, const DAT::tdual_int_2d &list,
const int & iswap, const int nfirst,
const int &pbc_flag, const int pbc[]);
int pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
DAT::tdual_xfloat_2d buf,int iswap,
int pbc_flag, int *pbc, ExecutionSpace space);
void unpack_border_kokkos(const int &n, const int &nfirst,
const DAT::tdual_xfloat_2d &buf,
ExecutionSpace space);
int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf,
DAT::tdual_int_1d k_sendlist,
DAT::tdual_int_1d k_copylist,
ExecutionSpace space, int dim,
X_FLOAT lo, X_FLOAT hi);
int unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv,
int nlocal, int dim, X_FLOAT lo, X_FLOAT hi,
ExecutionSpace space);
void sync(ExecutionSpace space, unsigned int mask);
void modified(ExecutionSpace space, unsigned int mask);
void sync_overlapping_device(ExecutionSpace space, unsigned int mask);
private:
tagint *tag;
int *type,*mask;
imageint *image;
double **x,**v,**f;
double *radius,*rmass;
double **omega,**torque;
int radvary;
DAT::t_tagint_1d d_tag;
HAT::t_tagint_1d h_tag;
DAT::t_imageint_1d d_image;
HAT::t_imageint_1d h_image;
DAT::t_int_1d d_type, d_mask;
HAT::t_int_1d h_type, h_mask;
DAT::t_x_array d_x;
DAT::t_v_array d_v;
DAT::t_f_array d_f;
DAT::t_float_1d d_radius;
HAT::t_float_1d h_radius;
DAT::t_float_1d d_rmass;
HAT::t_float_1d h_rmass;
DAT::t_v_array d_omega;
HAT::t_v_array h_omega;
DAT::t_f_array d_torque;
HAT::t_f_array h_torque;
DAT::tdual_int_1d k_count;
};
}
#endif
#endif
/* ERROR/WARNING messages:
E: Per-processor system is too big
The number of owned atoms plus ghost atoms on a single
processor must fit in 32-bit integer.
E: Invalid atom type in Atoms section of data file
Atom types must range from 1 to specified # of types.
E: Invalid radius in Atoms section of data file
Radius must be >= 0.0.
E: Invalid density in Atoms section of data file
Density value cannot be <= 0.0.
*/

View File

@ -0,0 +1,346 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#include "fix_neigh_history_kokkos.h"
#include "atom_kokkos.h"
#include "error.h"
#include "memory_kokkos.h"
#include "neigh_list_kokkos.h"
#include "pair_kokkos.h"
#include "comm.h"
using namespace LAMMPS_NS;
/* ---------------------------------------------------------------------- */
template <class DeviceType>
FixNeighHistoryKokkos<DeviceType>::FixNeighHistoryKokkos(LAMMPS *lmp, int narg, char **arg) :
FixNeighHistory(lmp, narg, arg)
{
kokkosable = 1;
atomKK = (AtomKokkos *)atom;
execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
memory->destroy(npartner);
memory->destroy(partner);
memory->destroy(valuepartner);
maxpartner = 8;
grow_arrays(atom->nmax);
d_resize = typename ArrayTypes<DeviceType>::t_int_scalar("FixNeighHistoryKokkos::resize");
#ifndef KOKKOS_USE_CUDA_UVM
h_resize = Kokkos::create_mirror_view(d_resize);
#else
h_resize = d_resize;
#endif
h_resize() = 1;
}
/* ---------------------------------------------------------------------- */
template <class DeviceType>
FixNeighHistoryKokkos<DeviceType>::~FixNeighHistoryKokkos()
{
if (copymode) return;
memoryKK->destroy_kokkos(k_npartner, npartner);
memoryKK->destroy_kokkos(k_partner, partner);
memoryKK->destroy_kokkos(k_valuepartner, valuepartner);
}
/* ---------------------------------------------------------------------- */
template <class DeviceType>
void FixNeighHistoryKokkos<DeviceType>::init()
{
if (atomKK->tag_enable == 0)
error->all(FLERR,"Neighbor history requires atoms have IDs");
}
/* ---------------------------------------------------------------------- */
template <class DeviceType>
void FixNeighHistoryKokkos<DeviceType>::pre_exchange()
{
copymode = 1;
h_resize() = 1;
while (h_resize() > 0) {
FixNeighHistoryKokkosZeroPartnerCountFunctor<DeviceType> zero(this);
Kokkos::parallel_for(nlocal_neigh,zero);
h_resize() = 0;
deep_copy(d_resize, h_resize);
FixNeighHistoryKokkosPreExchangeFunctor<DeviceType> f(this);
Kokkos::parallel_for(nlocal_neigh,f);
deep_copy(h_resize, d_resize);
if (h_resize() > 0) {
maxpartner += 8;
memoryKK->grow_kokkos(k_partner,partner,atom->nmax,maxpartner,"neighbor_history:partner");
memoryKK->grow_kokkos(k_valuepartner,valuepartner,atom->nmax,dnum*maxpartner,"neighbor_history:valuepartner");
}
}
copymode = 0;
comm->maxexchange_fix = MAX(comm->maxexchange_fix,(dnum+1)*maxpartner+1);
}
/* ---------------------------------------------------------------------- */
template <class DeviceType>
void FixNeighHistoryKokkos<DeviceType>::zero_partner_count_item(const int &i) const
{
d_npartner[i] = 0;
}
template <class DeviceType>
void FixNeighHistoryKokkos<DeviceType>::pre_exchange_item(const int &ii) const
{
const int i = d_ilist[ii];
const int jnum = d_numneigh[i];
for (int jj = 0; jj < jnum; jj++) {
if (d_firstflag(i,jj)) {
int j = d_neighbors(i,jj);
j &= NEIGHMASK;
int m = Kokkos::atomic_fetch_add(&d_npartner[i],1);
if (m < maxpartner) {
d_partner(i,m) = tag[j];
for (int k = 0; k < dnum; k++)
d_valuepartner(i,dnum*m+k) = d_firstvalue(i,dnum*jj+k);
} else {
d_resize() = 1;
}
if (j < nlocal_neigh) {
m = Kokkos::atomic_fetch_add(&d_npartner[j],1);
if (m < maxpartner) {
d_partner(j,m) = tag[i];
for (int k = 0; k < dnum; k++)
d_valuepartner(j,dnum*m+k) = d_firstvalue(i,dnum*jj+k);
} else {
d_resize() = 1;
}
}
}
}
}
/* ---------------------------------------------------------------------- */
template <class DeviceType>
void FixNeighHistoryKokkos<DeviceType>::setup_post_neighbor()
{
post_neighbor();
}
/* ---------------------------------------------------------------------- */
template <class DeviceType>
void FixNeighHistoryKokkos<DeviceType>::post_neighbor()
{
tag = atomKK->k_tag.view<DeviceType>();
int inum = pair->list->inum;
NeighListKokkos<DeviceType>* k_list = static_cast<NeighListKokkos<DeviceType>*>(pair->list);
d_numneigh = k_list->d_numneigh;
d_neighbors = k_list->d_neighbors;
d_ilist = k_list->d_ilist;
// store atom counts used for new neighbor list which was just built
int nlocal = atom->nlocal;
int nall = nlocal + atom->nghost;
nlocal_neigh = nlocal;
nall_neigh = nall;
// realloc firstflag and firstvalue if needed
if (maxatom < nlocal || k_list->maxneighs > d_firstflag.extent(1)) {
maxatom = nall;
d_firstflag = typename ArrayTypes<DeviceType>::t_int_2d("neighbor_history:firstflag",maxatom,k_list->maxneighs);
d_firstvalue = typename ArrayTypes<DeviceType>::t_float_2d("neighbor_history:firstvalue",maxatom,k_list->maxneighs*dnum);
}
copymode = 1;
FixNeighHistoryKokkosPostNeighborFunctor<DeviceType> f(this);
Kokkos::parallel_for(inum,f);
copymode = 0;
#ifdef LMP_KOKKOS_DEBUG
typename ArrayTypes<LMPHostType>::t_neighbors_2d h_neighbors("neighbor_history:h_neighbor",k_list->d_neighbors.extent(0), k_list->d_neighbors.extent(1));
deep_copy(h_neighbors, d_neighbors);
typename ArrayTypes<LMPHostType>::t_int_1d h_numneigh("neighbor_history:h_numneigh",k_list->d_numneigh.extent(0));
deep_copy(h_numneigh, d_numneigh);
typename ArrayTypes<LMPHostType>::t_int_2d h_firstflag("neighbor_history:h_firstflag",maxatom,k_list->maxneighs);
deep_copy(h_firstflag, d_firstflag);
typename ArrayTypes<LMPHostType>::t_float_2d h_firstvalue("neighbor_history:h_firstvalue",maxatom,k_list->maxneighs*dnum);
deep_copy(h_firstvalue, d_firstvalue);
#endif
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void FixNeighHistoryKokkos<DeviceType>::post_neighbor_item(const int &ii) const
{
const int i = d_ilist[ii];
const int jnum = d_numneigh[i];
const int np = d_npartner[i];
for (int jj = 0; jj < jnum; jj++) {
int j = d_neighbors(i,jj);
const int rflag = j >> SBBITS & 3;
j &= NEIGHMASK;
int m;
if (rflag) {
int jtag = tag(j);
for (m = 0; m < np; m++)
if (d_partner(i, m) == jtag) break;
if (m < np) {
d_firstflag(i,jj) = 1;
for (int k = 0; k < dnum; k++) {
d_firstvalue(i, dnum*jj+k) = d_valuepartner(i, dnum*m+k);
}
} else {
d_firstflag(i,jj) = 0;
for (int k = 0; k < dnum; k++) {
d_firstvalue(i, dnum*jj+k) = 0;
}
}
} else {
d_firstflag(i,jj) = 0;
for (int k = 0; k < dnum; k++) {
d_firstvalue(i, dnum*jj+k) = 0;
}
}
}
}
/* ----------------------------------------------------------------------
memory usage of local atom-based arrays
------------------------------------------------------------------------- */
template<class DeviceType>
double FixNeighHistoryKokkos<DeviceType>::memory_usage()
{
double bytes = d_firstflag.extent(0)*d_firstflag.extent(1)*sizeof(int);
bytes += d_firstvalue.extent(0)*d_firstvalue.extent(1)*sizeof(double);
bytes += 2*k_npartner.extent(0)*sizeof(int);
bytes += 2*k_partner.extent(0)*k_partner.extent(1)*sizeof(int);
bytes += 2*k_valuepartner.extent(0)*k_valuepartner.extent(1)*sizeof(double);
return bytes;
}
/* ----------------------------------------------------------------------
allocate fictitious charge arrays
------------------------------------------------------------------------- */
template<class DeviceType>
void FixNeighHistoryKokkos<DeviceType>::grow_arrays(int nmax)
{
k_npartner.template sync<LMPHostType>(); // force reallocation on host
k_partner.template sync<LMPHostType>();
k_valuepartner.template sync<LMPHostType>();
memoryKK->grow_kokkos(k_npartner,npartner,nmax,"neighbor_history:npartner");
memoryKK->grow_kokkos(k_partner,partner,nmax,maxpartner,"neighbor_history:partner");
memoryKK->grow_kokkos(k_valuepartner,valuepartner,nmax,dnum*maxpartner,"neighbor_history:valuepartner");
d_npartner = k_npartner.template view<DeviceType>();
d_partner = k_partner.template view<DeviceType>();
d_valuepartner = k_valuepartner.template view<DeviceType>();
k_npartner.template modify<LMPHostType>();
k_partner.template modify<LMPHostType>();
k_valuepartner.template modify<LMPHostType>();
}
/* ----------------------------------------------------------------------
copy values within fictitious charge arrays
------------------------------------------------------------------------- */
template<class DeviceType>
void FixNeighHistoryKokkos<DeviceType>::copy_arrays(int i, int j, int delflag)
{
k_npartner.template sync<LMPHostType>();
k_partner.template sync<LMPHostType>();
k_valuepartner.template sync<LMPHostType>();
npartner[j] = npartner[i];
for (int m = 0; m < npartner[i]; m++) {
partner[j][m] = partner[i][m];
valuepartner[j][m] = valuepartner[i][m];
}
k_npartner.template modify<LMPHostType>();
k_partner.template modify<LMPHostType>();
k_valuepartner.template modify<LMPHostType>();
}
/* ----------------------------------------------------------------------
pack values in local atom-based array for exchange with another proc
------------------------------------------------------------------------- */
template<class DeviceType>
int FixNeighHistoryKokkos<DeviceType>::pack_exchange(int i, double *buf)
{
k_npartner.template sync<LMPHostType>();
k_partner.template sync<LMPHostType>();
k_valuepartner.template sync<LMPHostType>();
int n = 0;
buf[n++] = npartner[i];
for (int m = 0; m < npartner[i]; m++) buf[n++] = partner[i][m];
for (int m = 0; m < dnum*npartner[i]; m++) buf[n++] = valuepartner[i][m];
return n;
}
/* ----------------------------------------------------------------------
unpack values in local atom-based array from exchange with another proc
------------------------------------------------------------------------- */
template<class DeviceType>
int FixNeighHistoryKokkos<DeviceType>::unpack_exchange(int nlocal, double *buf)
{
int n = 0;
npartner[nlocal] = static_cast<int>(buf[n++]);
for (int m = 0; m < npartner[nlocal]; m++) partner[nlocal][m] = static_cast<int>(buf[n++]);
for (int m = 0; m < dnum*npartner[nlocal]; m++) valuepartner[nlocal][m] = buf[n++];
k_npartner.template modify<LMPHostType>();
k_partner.template modify<LMPHostType>();
k_valuepartner.template modify<LMPHostType>();
return n;
}
/* ---------------------------------------------------------------------- */
namespace LAMMPS_NS {
template class FixNeighHistoryKokkos<LMPDeviceType>;
#ifdef KOKKOS_HAVE_CUDA
template class FixNeighHistoryKokkos<LMPHostType>;
#endif
}

View File

@ -0,0 +1,107 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef FIX_CLASS
FixStyle(NEIGH_HISTORY/KK,FixNeighHistoryKokkos<LMPDeviceType>)
FixStyle(NEIGH_HISTORY/KK/DEVICE,FixNeighHistoryKokkos<LMPDeviceType>)
FixStyle(NEIGH_HISTORY/KK/HOST,FixNeighHistoryKokkos<LMPHostType>)
#else
#ifndef LMP_FIX_NEIGH_HISTORY_KOKKOS_H
#define LMP_FIX_NEIGH_HISTORY_KOKKOS_H
#include "fix_neigh_history.h"
#include "kokkos_type.h"
namespace LAMMPS_NS {
template <class DeviceType>
class FixNeighHistoryKokkos : public FixNeighHistory {
public:
FixNeighHistoryKokkos(class LAMMPS *, int, char **);
~FixNeighHistoryKokkos();
void init();
void pre_exchange();
void setup_post_neighbor();
virtual void post_neighbor();
double memory_usage();
void grow_arrays(int);
void copy_arrays(int, int, int);
int pack_exchange(int, double *);
int unpack_exchange(int, double *);
KOKKOS_INLINE_FUNCTION
void zero_partner_count_item(const int &i) const;
KOKKOS_INLINE_FUNCTION
void pre_exchange_item(const int &ii) const;
KOKKOS_INLINE_FUNCTION
void post_neighbor_item(const int &ii) const;
typename ArrayTypes<DeviceType>::t_int_2d d_firstflag;
typename ArrayTypes<DeviceType>::t_float_2d d_firstvalue;
private:
typename ArrayTypes<DeviceType>::tdual_int_1d k_npartner;
typename ArrayTypes<DeviceType>::tdual_int_2d k_partner;
typename ArrayTypes<DeviceType>::tdual_float_2d k_valuepartner;
// for neighbor list lookup
typename ArrayTypes<DeviceType>::t_neighbors_2d d_neighbors;
typename ArrayTypes<DeviceType>::t_int_1d_randomread d_ilist;
typename ArrayTypes<DeviceType>::t_int_1d_randomread d_numneigh;
typename ArrayTypes<DeviceType>::t_tagint_1d tag;
typename ArrayTypes<DeviceType>::t_int_1d d_npartner;
typename ArrayTypes<DeviceType>::t_int_2d d_partner;
typename ArrayTypes<DeviceType>::t_float_2d d_valuepartner;
typename ArrayTypes<DeviceType>::t_int_scalar d_resize;
typename ArrayTypes<LMPHostType>::t_int_scalar h_resize;
};
template <class DeviceType>
struct FixNeighHistoryKokkosZeroPartnerCountFunctor {
FixNeighHistoryKokkos<DeviceType> c;
FixNeighHistoryKokkosZeroPartnerCountFunctor(FixNeighHistoryKokkos<DeviceType> *c_ptr): c(*c_ptr) {}
KOKKOS_INLINE_FUNCTION
void operator()(const int &i) const {
c.zero_partner_count_item(i);
}
};
template <class DeviceType>
struct FixNeighHistoryKokkosPreExchangeFunctor {
FixNeighHistoryKokkos<DeviceType> c;
FixNeighHistoryKokkosPreExchangeFunctor(FixNeighHistoryKokkos<DeviceType> *c_ptr): c(*c_ptr) {}
KOKKOS_INLINE_FUNCTION
void operator() (const int &i) const {
c.pre_exchange_item(i);
}
};
template <class DeviceType>
struct FixNeighHistoryKokkosPostNeighborFunctor {
FixNeighHistoryKokkos<DeviceType> c;
FixNeighHistoryKokkosPostNeighborFunctor(FixNeighHistoryKokkos<DeviceType> *c_ptr): c(*c_ptr) {}
KOKKOS_INLINE_FUNCTION
void operator() (const int &i) const {
c.post_neighbor_item(i);
}
};
} // namespace LAMMPS_NS
#endif // LMP_FIX_NEIGH_HISTORY_KOKKOS_H
#endif // FIX_CLASS

View File

@ -24,8 +24,8 @@ namespace LAMMPS_NS {
/* ---------------------------------------------------------------------- */
template<class DeviceType, int HALF_NEIGH, int GHOST, int TRI>
NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI>::NPairKokkos(LAMMPS *lmp) : NPair(lmp) {
template<class DeviceType, int HALF_NEIGH, int GHOST, int TRI, int SIZE>
NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI,SIZE>::NPairKokkos(LAMMPS *lmp) : NPair(lmp) {
}
@ -33,8 +33,8 @@ NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI>::NPairKokkos(LAMMPS *lmp) : NPair(l
copy needed info from Neighbor class to this build class
------------------------------------------------------------------------- */
template<class DeviceType, int HALF_NEIGH, int GHOST, int TRI>
void NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI>::copy_neighbor_info()
template<class DeviceType, int HALF_NEIGH, int GHOST, int TRI, int SIZE>
void NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI,SIZE>::copy_neighbor_info()
{
NPair::copy_neighbor_info();
@ -63,8 +63,8 @@ void NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI>::copy_neighbor_info()
copy per-atom and per-bin vectors from NBin class to this build class
------------------------------------------------------------------------- */
template<class DeviceType, int HALF_NEIGH, int GHOST, int TRI>
void NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI>::copy_bin_info()
template<class DeviceType, int HALF_NEIGH, int GHOST, int TRI, int SIZE>
void NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI,SIZE>::copy_bin_info()
{
NPair::copy_bin_info();
@ -80,8 +80,8 @@ void NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI>::copy_bin_info()
copy needed info from NStencil class to this build class
------------------------------------------------------------------------- */
template<class DeviceType, int HALF_NEIGH, int GHOST, int TRI>
void NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI>::copy_stencil_info()
template<class DeviceType, int HALF_NEIGH, int GHOST, int TRI, int SIZE>
void NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI,SIZE>::copy_stencil_info()
{
NPair::copy_stencil_info();
@ -110,8 +110,8 @@ void NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI>::copy_stencil_info()
/* ---------------------------------------------------------------------- */
template<class DeviceType, int HALF_NEIGH, int GHOST, int TRI>
void NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI>::build(NeighList *list_)
template<class DeviceType, int HALF_NEIGH, int GHOST, int TRI, int SIZE>
void NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI,SIZE>::build(NeighList *list_)
{
NeighListKokkos<DeviceType>* list = (NeighListKokkos<DeviceType>*) list_;
const int nlocal = includegroup?atom->nfirst:atom->nlocal;
@ -131,6 +131,7 @@ void NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI>::build(NeighList *list_)
k_stencilxyz.view<DeviceType>(),
nlocal,
atomKK->k_x.view<DeviceType>(),
atomKK->k_radius.view<DeviceType>(),
atomKK->k_type.view<DeviceType>(),
atomKK->k_mask.view<DeviceType>(),
atomKK->k_molecule.view<DeviceType>(),
@ -155,7 +156,8 @@ void NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI>::build(NeighList *list_)
k_ex_mol_intra.view<DeviceType>(),
bboxhi,bboxlo,
domain->xperiodic,domain->yperiodic,domain->zperiodic,
domain->xprd_half,domain->yprd_half,domain->zprd_half);
domain->xprd_half,domain->yprd_half,domain->zprd_half,
skin);
k_cutneighsq.sync<DeviceType>();
k_ex1_type.sync<DeviceType>();
@ -171,7 +173,7 @@ void NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI>::build(NeighList *list_)
k_bincount.sync<DeviceType>();
k_bins.sync<DeviceType>();
k_atom2bin.sync<DeviceType>();
atomKK->sync(Device,X_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK|TAG_MASK|SPECIAL_MASK);
atomKK->sync(Device,X_MASK|RADIUS_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK|TAG_MASK|SPECIAL_MASK);
data.special_flag[0] = special_flag[0];
data.special_flag[1] = special_flag[1];
@ -198,25 +200,49 @@ void NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI>::build(NeighList *list_)
Kokkos::parallel_for(nall, f);
} else {
if (newton_pair) {
NPairKokkosBuildFunctor<DeviceType,TRI?0:HALF_NEIGH,1,TRI> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor);
if (SIZE) {
NPairKokkosBuildFunctorSize<DeviceType,TRI?0:HALF_NEIGH,1,TRI> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor);
#ifdef KOKKOS_HAVE_CUDA
if (ExecutionSpaceFromDevice<DeviceType>::space == Device)
Kokkos::parallel_for(config, f);
else
Kokkos::parallel_for(nall, f);
if (ExecutionSpaceFromDevice<DeviceType>::space == Device)
Kokkos::parallel_for(config, f);
else
Kokkos::parallel_for(nall, f);
#else
Kokkos::parallel_for(nall, f);
Kokkos::parallel_for(nall, f);
#endif
} else {
NPairKokkosBuildFunctor<DeviceType,TRI?0:HALF_NEIGH,1,TRI> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor);
#ifdef KOKKOS_HAVE_CUDA
if (ExecutionSpaceFromDevice<DeviceType>::space == Device)
Kokkos::parallel_for(config, f);
else
Kokkos::parallel_for(nall, f);
#else
Kokkos::parallel_for(nall, f);
#endif
}
} else {
NPairKokkosBuildFunctor<DeviceType,HALF_NEIGH,0,0> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor);
if (SIZE) {
NPairKokkosBuildFunctorSize<DeviceType,HALF_NEIGH,0,0> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor);
#ifdef KOKKOS_HAVE_CUDA
if (ExecutionSpaceFromDevice<DeviceType>::space == Device)
Kokkos::parallel_for(config, f);
else
Kokkos::parallel_for(nall, f);
if (ExecutionSpaceFromDevice<DeviceType>::space == Device)
Kokkos::parallel_for(config, f);
else
Kokkos::parallel_for(nall, f);
#else
Kokkos::parallel_for(nall, f);
Kokkos::parallel_for(nall, f);
#endif
} else {
NPairKokkosBuildFunctor<DeviceType,HALF_NEIGH,0,0> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor);
#ifdef KOKKOS_HAVE_CUDA
if (ExecutionSpaceFromDevice<DeviceType>::space == Device)
Kokkos::parallel_for(config, f);
else
Kokkos::parallel_for(nall, f);
#else
Kokkos::parallel_for(nall, f);
#endif
}
}
}
deep_copy(data.h_resize, data.resize);
@ -774,19 +800,307 @@ void NeighborKokkosExecute<DeviceType>::
neigh_list.d_ilist(i) = i;
}
/* ---------------------------------------------------------------------- */
template<class DeviceType> template<int HalfNeigh,int Newton,int Tri>
void NeighborKokkosExecute<DeviceType>::
build_ItemSize(const int &i) const
{
/* if necessary, goto next page and add pages */
int n = 0;
int which = 0;
// get subview of neighbors of i
const AtomNeighbors neighbors_i = neigh_list.get_neighbors(i);
const X_FLOAT xtmp = x(i, 0);
const X_FLOAT ytmp = x(i, 1);
const X_FLOAT ztmp = x(i, 2);
const X_FLOAT radi = radius(i);
const int itype = type(i);
const int ibin = c_atom2bin(i);
const typename ArrayTypes<DeviceType>::t_int_1d_const_um stencil
= d_stencil;
const int mask_history = 3 << SBBITS;
// loop over all bins in neighborhood (includes ibin)
if(HalfNeigh)
for(int m = 0; m < c_bincount(ibin); m++) {
const int j = c_bins(ibin,m);
const int jtype = type(j);
//for same bin as atom i skip j if i==j and skip atoms "below and to the left" if using HalfNeighborlists
if((j == i) || (HalfNeigh && !Newton && (j < i)) ||
(HalfNeigh && Newton && ((j < i) || ((j >= nlocal) &&
((x(j, 2) < ztmp) || (x(j, 2) == ztmp && x(j, 1) < ytmp) ||
(x(j, 2) == ztmp && x(j, 1) == ytmp && x(j, 0) < xtmp)))))
) continue;
if(exclude && exclusion(i,j,itype,jtype)) continue;
const X_FLOAT delx = xtmp - x(j, 0);
const X_FLOAT dely = ytmp - x(j, 1);
const X_FLOAT delz = ztmp - x(j, 2);
const X_FLOAT rsq = delx * delx + dely * dely + delz * delz;
const X_FLOAT radsum = radi + radius(j);
const X_FLOAT cutsq = (radsum + skin) * (radsum + skin);
if(rsq <= cutsq) {
if(n<neigh_list.maxneighs) {
if(neigh_list.history && rsq < radsum*radsum) neighbors_i(n++) = j ^ mask_history;
else neighbors_i(n++) = j;
}
else n++;
}
}
for(int k = 0; k < nstencil; k++) {
const int jbin = ibin + stencil[k];
// get subview of jbin
if(HalfNeigh && (ibin==jbin)) continue;
//const ArrayTypes<DeviceType>::t_int_1d_const_um =Kokkos::subview<t_int_1d_const_um>(bins,jbin,ALL);
for(int m = 0; m < c_bincount(jbin); m++) {
const int j = c_bins(jbin,m);
const int jtype = type(j);
if(HalfNeigh && !Newton && (j < i)) continue;
if(!HalfNeigh && j==i) continue;
if(Tri) {
if (x(j,2) < ztmp) continue;
if (x(j,2) == ztmp) {
if (x(j,1) < ytmp) continue;
if (x(j,1) == ytmp) {
if (x(j,0) < xtmp) continue;
if (x(j,0) == xtmp && j <= i) continue;
}
}
}
if(exclude && exclusion(i,j,itype,jtype)) continue;
const X_FLOAT delx = xtmp - x(j, 0);
const X_FLOAT dely = ytmp - x(j, 1);
const X_FLOAT delz = ztmp - x(j, 2);
const X_FLOAT rsq = delx * delx + dely * dely + delz * delz;
const X_FLOAT radsum = radi + radius(j);
const X_FLOAT cutsq = (radsum + skin) * (radsum + skin);
if(rsq <= cutsq) {
if(n<neigh_list.maxneighs) {
if(neigh_list.history && rsq < radsum*radsum) neighbors_i(n++) = j ^ mask_history;
else neighbors_i(n++) = j;
}
else n++;
}
}
}
neigh_list.d_numneigh(i) = n;
if(n > neigh_list.maxneighs) {
resize() = 1;
if(n > new_maxneighs()) new_maxneighs() = n; // avoid atomics, safe because in while loop
}
neigh_list.d_ilist(i) = i;
}
/* ---------------------------------------------------------------------- */
#ifdef KOKKOS_HAVE_CUDA
template<class DeviceType> template<int HalfNeigh,int Newton,int Tri>
__device__ inline
void NeighborKokkosExecute<DeviceType>::build_ItemSizeCuda(typename Kokkos::TeamPolicy<DeviceType>::member_type dev) const
{
/* loop over atoms in i's bin,
*/
const int atoms_per_bin = c_bins.extent(1);
const int BINS_PER_TEAM = dev.team_size()/atoms_per_bin<1?1:dev.team_size()/atoms_per_bin;
const int TEAMS_PER_BIN = atoms_per_bin/dev.team_size()<1?1:atoms_per_bin/dev.team_size();
const int MY_BIN = dev.team_rank()/atoms_per_bin;
const int ibin = dev.league_rank()*BINS_PER_TEAM+MY_BIN;
if(ibin >=c_bincount.extent(0)) return;
X_FLOAT* other_x = sharedmem;
other_x = other_x + 6*atoms_per_bin*MY_BIN;
int* other_id = (int*) &other_x[5 * atoms_per_bin];
int bincount_current = c_bincount[ibin];
for(int kk = 0; kk < TEAMS_PER_BIN; kk++) {
const int MY_II = dev.team_rank()%atoms_per_bin+kk*dev.team_size();
const int i = MY_II < bincount_current ? c_bins(ibin, MY_II) : -1;
/* if necessary, goto next page and add pages */
int n = 0;
X_FLOAT xtmp;
X_FLOAT ytmp;
X_FLOAT ztmp;
X_FLOAT radi;
int itype;
const AtomNeighbors neighbors_i = neigh_list.get_neighbors((i>=0&&i<nlocal)?i:0);
const int mask_history = 3 << SBBITS;
if(i >= 0) {
xtmp = x(i, 0);
ytmp = x(i, 1);
ztmp = x(i, 2);
radi = radius(i);
itype = type(i);
other_x[MY_II] = xtmp;
other_x[MY_II + atoms_per_bin] = ytmp;
other_x[MY_II + 2 * atoms_per_bin] = ztmp;
other_x[MY_II + 3 * atoms_per_bin] = itype;
other_x[MY_II + 4 * atoms_per_bin] = radi;
}
other_id[MY_II] = i;
int test = (__syncthreads_count(i >= 0 && i <= nlocal) == 0);
if(test) return;
if(i >= 0 && i < nlocal) {
#pragma unroll 4
for(int m = 0; m < bincount_current; m++) {
int j = other_id[m];
const int jtype = other_x[m + 3 * atoms_per_bin];
//for same bin as atom i skip j if i==j and skip atoms "below and to the left" if using halfneighborlists
if((j == i) ||
(HalfNeigh && !Newton && (j < i)) ||
(HalfNeigh && Newton &&
((j < i) ||
((j >= nlocal) && ((x(j, 2) < ztmp) || (x(j, 2) == ztmp && x(j, 1) < ytmp) ||
(x(j, 2) == ztmp && x(j, 1) == ytmp && x(j, 0) < xtmp)))))
) continue;
if(Tri) {
if (x(j,2) < ztmp) continue;
if (x(j,2) == ztmp) {
if (x(j,1) < ytmp) continue;
if (x(j,1) == ytmp) {
if (x(j,0) < xtmp) continue;
if (x(j,0) == xtmp && j <= i) continue;
}
}
}
if(exclude && exclusion(i,j,itype,jtype)) continue;
const X_FLOAT delx = xtmp - other_x[m];
const X_FLOAT dely = ytmp - other_x[m + atoms_per_bin];
const X_FLOAT delz = ztmp - other_x[m + 2 * atoms_per_bin];
const X_FLOAT rsq = delx * delx + dely * dely + delz * delz;
const X_FLOAT radsum = radi + other_x[m + 4 * atoms_per_bin];
const X_FLOAT cutsq = (radsum + skin) * (radsum + skin);
if(rsq <= cutsq) {
if(n<neigh_list.maxneighs) {
if (neigh_list.history && rsq < radsum*radsum) neighbors_i(n++) = j ^ mask_history;
else neighbors_i(n++) = j;
}
else n++;
}
}
}
__syncthreads();
const typename ArrayTypes<DeviceType>::t_int_1d_const_um stencil
= d_stencil;
for(int k = 0; k < nstencil; k++) {
const int jbin = ibin + stencil[k];
if(ibin == jbin) continue;
bincount_current = c_bincount[jbin];
int j = MY_II < bincount_current ? c_bins(jbin, MY_II) : -1;
if(j >= 0) {
other_x[MY_II] = x(j, 0);
other_x[MY_II + atoms_per_bin] = x(j, 1);
other_x[MY_II + 2 * atoms_per_bin] = x(j, 2);
other_x[MY_II + 3 * atoms_per_bin] = type(j);
other_x[MY_II + 4 * atoms_per_bin] = radius(j);
}
other_id[MY_II] = j;
__syncthreads();
if(i >= 0 && i < nlocal) {
#pragma unroll 8
for(int m = 0; m < bincount_current; m++) {
const int j = other_id[m];
const int jtype = other_x[m + 3 * atoms_per_bin];
if(HalfNeigh && (j < i)) continue;
if(HalfNeigh && !Newton && (j < i)) continue;
if(!HalfNeigh && j==i) continue;
if(Tri) {
if (x(j,2) < ztmp) continue;
if (x(j,2) == ztmp) {
if (x(j,1) < ytmp) continue;
if (x(j,1) == ytmp) {
if (x(j,0) < xtmp) continue;
if (x(j,0) == xtmp && j <= i) continue;
}
}
}
if(exclude && exclusion(i,j,itype,jtype)) continue;
const X_FLOAT delx = xtmp - other_x[m];
const X_FLOAT dely = ytmp - other_x[m + atoms_per_bin];
const X_FLOAT delz = ztmp - other_x[m + 2 * atoms_per_bin];
const X_FLOAT rsq = delx * delx + dely * dely + delz * delz;
const X_FLOAT radsum = radi + other_x[m + 4 * atoms_per_bin];
const X_FLOAT cutsq = (radsum + skin) * (radsum + skin);
if(rsq <= cutsq) {
if(n<neigh_list.maxneighs) {
if (neigh_list.history && rsq < radsum*radsum) neighbors_i(n++) = j ^ mask_history;
else neighbors_i(n++) = j;
}
else n++;
}
}
}
__syncthreads();
}
if(i >= 0 && i < nlocal) {
neigh_list.d_numneigh(i) = n;
neigh_list.d_ilist(i) = i;
}
if(n > neigh_list.maxneighs) {
resize() = 1;
if(n > new_maxneighs()) new_maxneighs() = n; // avoid atomics, safe because in while loop
}
}
}
#endif
}
namespace LAMMPS_NS {
template class NPairKokkos<LMPDeviceType,0,0,0>;
template class NPairKokkos<LMPDeviceType,0,1,0>;
template class NPairKokkos<LMPDeviceType,1,0,0>;
template class NPairKokkos<LMPDeviceType,1,1,0>;
template class NPairKokkos<LMPDeviceType,1,0,1>;
template class NPairKokkos<LMPDeviceType,0,0,0,0>;
template class NPairKokkos<LMPDeviceType,0,1,0,0>;
template class NPairKokkos<LMPDeviceType,1,0,0,0>;
template class NPairKokkos<LMPDeviceType,1,1,0,0>;
template class NPairKokkos<LMPDeviceType,1,0,1,0>;
template class NPairKokkos<LMPDeviceType,1,0,0,1>;
template class NPairKokkos<LMPDeviceType,1,0,1,1>;
#ifdef KOKKOS_HAVE_CUDA
template class NPairKokkos<LMPHostType,0,0,0>;
template class NPairKokkos<LMPHostType,0,1,0>;
template class NPairKokkos<LMPHostType,1,0,0>;
template class NPairKokkos<LMPHostType,1,1,0>;
template class NPairKokkos<LMPHostType,1,0,1>;
template class NPairKokkos<LMPHostType,0,0,0,0>;
template class NPairKokkos<LMPHostType,0,1,0,0>;
template class NPairKokkos<LMPHostType,1,0,0,0>;
template class NPairKokkos<LMPHostType,1,1,0,0>;
template class NPairKokkos<LMPHostType,1,0,1,0>;
template class NPairKokkos<LMPHostType,1,0,0,1>;
template class NPairKokkos<LMPHostType,1,0,1,1>;
#endif
}

View File

@ -13,56 +13,76 @@
#ifdef NPAIR_CLASS
typedef NPairKokkos<LMPHostType,0,0,0> NPairKokkosFullBinHost;
typedef NPairKokkos<LMPHostType,0,0,0,0> NPairKokkosFullBinHost;
NPairStyle(full/bin/kk/host,
NPairKokkosFullBinHost,
NP_FULL | NP_BIN | NP_KOKKOS_HOST | NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI)
typedef NPairKokkos<LMPDeviceType,0,0,0> NPairKokkosFullBinDevice;
typedef NPairKokkos<LMPDeviceType,0,0,0,0> NPairKokkosFullBinDevice;
NPairStyle(full/bin/kk/device,
NPairKokkosFullBinDevice,
NP_FULL | NP_BIN | NP_KOKKOS_DEVICE | NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI)
typedef NPairKokkos<LMPHostType,0,1,0> NPairKokkosFullBinGhostHost;
typedef NPairKokkos<LMPHostType,0,1,0,0> NPairKokkosFullBinGhostHost;
NPairStyle(full/bin/ghost/kk/host,
NPairKokkosFullBinGhostHost,
NP_FULL | NP_BIN | NP_KOKKOS_HOST | NP_NEWTON | NP_NEWTOFF | NP_GHOST | NP_ORTHO | NP_TRI)
typedef NPairKokkos<LMPDeviceType,0,1,0> NPairKokkosFullBinGhostDevice;
typedef NPairKokkos<LMPDeviceType,0,1,0,0> NPairKokkosFullBinGhostDevice;
NPairStyle(full/bin/ghost/kk/device,
NPairKokkosFullBinGhostDevice,
NP_FULL | NP_BIN | NP_KOKKOS_DEVICE | NP_NEWTON | NP_NEWTOFF | NP_GHOST | NP_ORTHO | NP_TRI)
typedef NPairKokkos<LMPHostType,1,0,0> NPairKokkosHalfBinHost;
typedef NPairKokkos<LMPHostType,1,0,0,0> NPairKokkosHalfBinHost;
NPairStyle(half/bin/kk/host,
NPairKokkosHalfBinHost,
NP_HALF | NP_BIN | NP_KOKKOS_HOST | NP_NEWTON | NP_NEWTOFF | NP_ORTHO)
typedef NPairKokkos<LMPDeviceType,1,0,0> NPairKokkosHalfBinDevice;
typedef NPairKokkos<LMPDeviceType,1,0,0,0> NPairKokkosHalfBinDevice;
NPairStyle(half/bin/kk/device,
NPairKokkosHalfBinDevice,
NP_HALF | NP_BIN | NP_KOKKOS_DEVICE | NP_NEWTON | NP_NEWTOFF | NP_ORTHO)
typedef NPairKokkos<LMPHostType,1,0,1> NPairKokkosHalfBinHostTri;
typedef NPairKokkos<LMPHostType,1,0,1,0> NPairKokkosHalfBinHostTri;
NPairStyle(half/bin/kk/host,
NPairKokkosHalfBinHostTri,
NP_HALF | NP_BIN | NP_KOKKOS_HOST | NP_NEWTON | NP_NEWTOFF | NP_TRI)
typedef NPairKokkos<LMPDeviceType,1,0,1> NPairKokkosHalfBinDeviceTri;
typedef NPairKokkos<LMPDeviceType,1,0,1,0> NPairKokkosHalfBinDeviceTri;
NPairStyle(half/bin/kk/device,
NPairKokkosHalfBinDeviceTri,
NP_HALF | NP_BIN | NP_KOKKOS_DEVICE | NP_NEWTON | NP_NEWTOFF | NP_TRI)
typedef NPairKokkos<LMPHostType,1,1,0> NPairKokkosHalfBinGhostHost;
typedef NPairKokkos<LMPHostType,1,1,0,0> NPairKokkosHalfBinGhostHost;
NPairStyle(half/bin/ghost/kk/host,
NPairKokkosHalfBinGhostHost,
NP_HALF | NP_BIN | NP_KOKKOS_HOST | NP_NEWTON | NP_NEWTOFF | NP_GHOST | NP_ORTHO | NP_TRI)
typedef NPairKokkos<LMPDeviceType,1,1,0> NPairKokkosHalfBinGhostDevice;
typedef NPairKokkos<LMPDeviceType,1,1,0,0> NPairKokkosHalfBinGhostDevice;
NPairStyle(half/bin/ghost/kk/device,
NPairKokkosHalfBinGhostDevice,
NP_HALF | NP_BIN | NP_KOKKOS_DEVICE | NP_NEWTON | NP_NEWTOFF | NP_GHOST | NP_ORTHO | NP_TRI)
typedef NPairKokkos<LMPHostType,1,0,0,1> NPairKokkosHalfSizeBinHost;
NPairStyle(half/size/bin/kk/host,
NPairKokkosHalfSizeBinHost,
NP_HALF | NP_SIZE | NP_BIN | NP_KOKKOS_HOST | NP_NEWTON | NP_NEWTOFF | NP_ORTHO)
typedef NPairKokkos<LMPDeviceType,1,0,0,1> NPairKokkosHalfSizeBinDevice;
NPairStyle(half/size/bin/kk/device,
NPairKokkosHalfSizeBinDevice,
NP_HALF | NP_SIZE | NP_BIN | NP_KOKKOS_DEVICE | NP_NEWTON | NP_NEWTOFF | NP_ORTHO)
typedef NPairKokkos<LMPHostType,1,0,1,1> NPairKokkosHalfSizeBinHostTri;
NPairStyle(half/size/bin/kk/host,
NPairKokkosHalfSizeBinHostTri,
NP_HALF | NP_SIZE | NP_BIN | NP_KOKKOS_HOST | NP_NEWTON | NP_NEWTOFF | NP_TRI)
typedef NPairKokkos<LMPDeviceType,1,0,1,1> NPairKokkosHalfSizeBinDeviceTri;
NPairStyle(half/size/bin/kk/device,
NPairKokkosHalfSizeBinDeviceTri,
NP_HALF | NP_SIZE | NP_BIN | NP_KOKKOS_DEVICE | NP_NEWTON | NP_NEWTOFF | NP_TRI)
#else
#ifndef LMP_NPAIR_KOKKOS_H
@ -73,7 +93,7 @@ NPairStyle(half/bin/ghost/kk/device,
namespace LAMMPS_NS {
template<class DeviceType, int HALF_NEIGH, int GHOST, int TRI>
template<class DeviceType, int HALF_NEIGH, int GHOST, int TRI, int SIZE>
class NPairKokkos : public NPair {
public:
NPairKokkos(class LAMMPS *);
@ -162,6 +182,7 @@ class NeighborKokkosExecute
// data from Atom class
const typename AT::t_x_array_randomread x;
const typename AT::t_float_1d_randomread radius;
const typename AT::t_int_1d_const type,mask;
const typename AT::t_tagint_1d_const molecule;
const typename AT::t_tagint_1d_const tag;
@ -188,6 +209,9 @@ class NeighborKokkosExecute
const int xperiodic, yperiodic, zperiodic;
const int xprd_half, yprd_half, zprd_half;
// GRANULAR required member variables
const X_FLOAT skin;
NeighborKokkosExecute(
const NeighListKokkos<DeviceType> &_neigh_list,
const typename AT::t_xfloat_2d_randomread &_cutneighsq,
@ -199,6 +223,7 @@ class NeighborKokkosExecute
const typename AT::t_int_1d_3 &_d_stencilxyz,
const int _nlocal,
const typename AT::t_x_array_randomread &_x,
const typename AT::t_float_1d_randomread &_radius,
const typename AT::t_int_1d_const &_type,
const typename AT::t_int_1d_const &_mask,
const typename AT::t_tagint_1d_const &_molecule,
@ -225,13 +250,14 @@ class NeighborKokkosExecute
const typename AT::t_int_1d_const & _ex_mol_intra,
const X_FLOAT *_bboxhi, const X_FLOAT* _bboxlo,
const int & _xperiodic, const int & _yperiodic, const int & _zperiodic,
const int & _xprd_half, const int & _yprd_half, const int & _zprd_half):
const int & _xprd_half, const int & _yprd_half, const int & _zprd_half,
const X_FLOAT _skin):
neigh_list(_neigh_list), cutneighsq(_cutneighsq),
bincount(_bincount),c_bincount(_bincount),bins(_bins),c_bins(_bins),
atom2bin(_atom2bin),c_atom2bin(_atom2bin),
nstencil(_nstencil),d_stencil(_d_stencil),d_stencilxyz(_d_stencilxyz),
nlocal(_nlocal),
x(_x),type(_type),mask(_mask),molecule(_molecule),
x(_x),radius(_radius),type(_type),mask(_mask),molecule(_molecule),
tag(_tag),special(_special),nspecial(_nspecial),molecular(_molecular),
nbinx(_nbinx),nbiny(_nbiny),nbinz(_nbinz),
mbinx(_mbinx),mbiny(_mbiny),mbinz(_mbinz),
@ -245,7 +271,8 @@ class NeighborKokkosExecute
ex_mol_group(_ex_mol_group),ex_mol_bit(_ex_mol_bit),
ex_mol_intra(_ex_mol_intra),
xperiodic(_xperiodic),yperiodic(_yperiodic),zperiodic(_zperiodic),
xprd_half(_xprd_half),yprd_half(_yprd_half),zprd_half(_zprd_half) {
xprd_half(_xprd_half),yprd_half(_yprd_half),zprd_half(_zprd_half),
skin(_skin) {
if (molecular == 2) moltemplate = 1;
else moltemplate = 0;
@ -280,10 +307,18 @@ class NeighborKokkosExecute
KOKKOS_FUNCTION
void build_Item_Ghost(const int &i) const;
template<int HalfNeigh, int Newton, int Tri>
KOKKOS_FUNCTION
void build_ItemSize(const int &i) const;
#ifdef KOKKOS_HAVE_CUDA
template<int HalfNeigh, int Newton, int Tri>
__device__ inline
void build_ItemCuda(typename Kokkos::TeamPolicy<DeviceType>::member_type dev) const;
template<int HalfNeigh, int Newton, int Tri>
__device__ inline
void build_ItemSizeCuda(typename Kokkos::TeamPolicy<DeviceType>::member_type dev) const;
#endif
KOKKOS_INLINE_FUNCTION
@ -399,6 +434,48 @@ struct NPairKokkosBuildFunctorGhost {
}
};
template <class DeviceType, int HALF_NEIGH, int GHOST_NEWTON, int TRI>
struct NPairKokkosBuildFunctorSize {
typedef DeviceType device_type;
const NeighborKokkosExecute<DeviceType> c;
const size_t sharedsize;
NPairKokkosBuildFunctorSize(const NeighborKokkosExecute<DeviceType> &_c,
const size_t _sharedsize): c(_c), sharedsize(_sharedsize) {};
KOKKOS_INLINE_FUNCTION
void operator() (const int & i) const {
c.template build_ItemSize<HALF_NEIGH,GHOST_NEWTON,TRI>(i);
}
#ifdef KOKKOS_HAVE_CUDA
__device__ inline
void operator() (typename Kokkos::TeamPolicy<DeviceType>::member_type dev) const {
c.template build_ItemSizeCuda<HALF_NEIGH,GHOST_NEWTON,TRI>(dev);
}
size_t shmem_size(const int team_size) const { (void) team_size; return sharedsize; }
#endif
};
template <int HALF_NEIGH, int GHOST_NEWTON, int TRI>
struct NPairKokkosBuildFunctorSize<LMPHostType,HALF_NEIGH,GHOST_NEWTON,TRI> {
typedef LMPHostType device_type;
const NeighborKokkosExecute<LMPHostType> c;
const size_t sharedsize;
NPairKokkosBuildFunctorSize(const NeighborKokkosExecute<LMPHostType> &_c,
const size_t _sharedsize): c(_c), sharedsize(_sharedsize) {};
KOKKOS_INLINE_FUNCTION
void operator() (const int & i) const {
c.template build_ItemSize<HALF_NEIGH,GHOST_NEWTON,TRI>(i);
}
void operator() (typename Kokkos::TeamPolicy<LMPHostType>::member_type dev) const {} // Should error out
};
}
#endif

View File

@ -0,0 +1,440 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#include "pair_gran_hooke_history_kokkos.h"
#include "kokkos.h"
#include "atom_kokkos.h"
#include "atom_masks.h"
#include "memory_kokkos.h"
#include "force.h"
#include "neighbor.h"
#include "neigh_list.h"
#include "neigh_request.h"
#include "error.h"
#include "modify.h"
#include "fix_neigh_history_kokkos.h"
#include "update.h"
using namespace LAMMPS_NS;
/* ---------------------------------------------------------------------- */
template<class DeviceType>
PairGranHookeHistoryKokkos<DeviceType>::PairGranHookeHistoryKokkos(LAMMPS *lmp) : PairGranHookeHistory(lmp)
{
atomKK = (AtomKokkos *) atom;
execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
datamask_read = X_MASK | V_MASK | OMEGA_MASK | F_MASK | TORQUE_MASK | TYPE_MASK | MASK_MASK | ENERGY_MASK | VIRIAL_MASK | RMASS_MASK | RADIUS_MASK;
datamask_modify = F_MASK | TORQUE_MASK | ENERGY_MASK | VIRIAL_MASK;
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
PairGranHookeHistoryKokkos<DeviceType>::~PairGranHookeHistoryKokkos()
{
if (copymode) return;
if (allocated) {
memoryKK->destroy_kokkos(k_eatom,eatom);
memoryKK->destroy_kokkos(k_vatom,vatom);
eatom = NULL;
vatom = NULL;
}
}
/* ----------------------------------------------------------------------
init specific to this pair style
------------------------------------------------------------------------- */
template<class DeviceType>
void PairGranHookeHistoryKokkos<DeviceType>::init_style()
{
if (history && fix_history == NULL) {
char dnumstr[16];
sprintf(dnumstr,"%d",3);
char **fixarg = new char*[4];
fixarg[0] = (char *) "NEIGH_HISTORY";
fixarg[1] = (char *) "all";
if (execution_space == Device)
fixarg[2] = (char *) "NEIGH_HISTORY/KK/DEVICE";
else
fixarg[2] = (char *) "NEIGH_HISTORY/KK/HOST";
fixarg[3] = dnumstr;
modify->add_fix(4,fixarg,1);
delete [] fixarg;
fix_history = (FixNeighHistory *) modify->fix[modify->nfix-1];
fix_history->pair = this;
fix_historyKK = (FixNeighHistoryKokkos<DeviceType> *)fix_history;
}
PairGranHookeHistory::init_style();
// irequest = neigh request made by parent class
neighflag = lmp->kokkos->neighflag;
int irequest = neighbor->nrequest - 1;
neighbor->requests[irequest]->
kokkos_host = Kokkos::Impl::is_same<DeviceType,LMPHostType>::value &&
!Kokkos::Impl::is_same<DeviceType,LMPDeviceType>::value;
neighbor->requests[irequest]->
kokkos_device = Kokkos::Impl::is_same<DeviceType,LMPDeviceType>::value;
if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with gran/hooke/history/kk");
}
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
void PairGranHookeHistoryKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
{
copymode = 1;
eflag = eflag_in;
vflag = vflag_in;
if (neighflag == FULL) no_virial_fdotr_compute = 1;
if (eflag || vflag) ev_setup(eflag,vflag,0);
else evflag = vflag_fdotr = 0;
int shearupdate = 1;
if (update->setupflag) shearupdate = 0;
// reallocate per-atom arrays if necessary
if (eflag_atom) {
memoryKK->destroy_kokkos(k_eatom,eatom);
memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom");
d_eatom = k_eatom.view<DeviceType>();
}
if (vflag_atom) {
memoryKK->destroy_kokkos(k_vatom,vatom);
memoryKK->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom");
d_vatom = k_vatom.view<DeviceType>();
}
atomKK->sync(execution_space,datamask_read);
if (eflag || vflag) atomKK->modified(execution_space,datamask_modify);
else atomKK->modified(execution_space,F_MASK | TORQUE_MASK);
x = atomKK->k_x.view<DeviceType>();
v = atomKK->k_v.view<DeviceType>();
omega = atomKK->k_omega.view<DeviceType>();
c_x = atomKK->k_x.view<DeviceType>();
f = atomKK->k_f.view<DeviceType>();
torque = atomKK->k_torque.view<DeviceType>();
type = atomKK->k_type.view<DeviceType>();
mask = atomKK->k_mask.view<DeviceType>();
tag = atomKK->k_tag.view<DeviceType>();
rmass = atomKK->k_rmass.view<DeviceType>();
radius = atomKK->k_radius.view<DeviceType>();
nlocal = atom->nlocal;
nall = atom->nlocal + atom->nghost;
newton_pair = force->newton_pair;
special_lj[0] = force->special_lj[0];
special_lj[1] = force->special_lj[1];
special_lj[2] = force->special_lj[2];
special_lj[3] = force->special_lj[3];
int inum = list->inum;
NeighListKokkos<DeviceType>* k_list = static_cast<NeighListKokkos<DeviceType>*>(list);
d_numneigh = k_list->d_numneigh;
d_neighbors = k_list->d_neighbors;
d_ilist = k_list->d_ilist;
d_firsttouch = fix_historyKK->d_firstflag;
d_firstshear = fix_historyKK->d_firstvalue;
EV_FLOAT ev;
if (lmp->kokkos->neighflag == HALF) {
if (force->newton_pair) {
if (evflag) {
if (shearupdate) {
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairGranHookeHistoryCompute<HALF,1,1,1>>(0,inum),*this);
} else {
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairGranHookeHistoryCompute<HALF,1,1,0>>(0,inum),*this);
}
} else {
if (shearupdate) {
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairGranHookeHistoryCompute<HALF,1,0,1>>(0,inum),*this);
} else {
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairGranHookeHistoryCompute<HALF,1,0,0>>(0,inum),*this);
}
}
} else {
if (evflag) {
if (shearupdate) {
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairGranHookeHistoryCompute<HALF,0,1,1>>(0,inum),*this);
} else {
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairGranHookeHistoryCompute<HALF,0,1,0>>(0,inum),*this);
}
} else {
if (shearupdate) {
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairGranHookeHistoryCompute<HALF,0,0,1>>(0,inum),*this);
} else {
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairGranHookeHistoryCompute<HALF,0,0,0>>(0,inum),*this);
}
}
}
} else { // HALFTHREAD
if (force->newton_pair) {
if (evflag) {
if (shearupdate) {
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairGranHookeHistoryCompute<HALFTHREAD,1,1,1>>(0,inum),*this);
} else {
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairGranHookeHistoryCompute<HALFTHREAD,1,1,0>>(0,inum),*this);
}
} else {
if (shearupdate) {
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairGranHookeHistoryCompute<HALFTHREAD,1,0,1>>(0,inum),*this);
} else {
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairGranHookeHistoryCompute<HALFTHREAD,1,0,0>>(0,inum),*this);
}
}
} else {
if (evflag) {
if (shearupdate) {
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairGranHookeHistoryCompute<HALFTHREAD,0,1,1>>(0,inum),*this);
} else {
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairGranHookeHistoryCompute<HALFTHREAD,0,1,0>>(0,inum),*this);
}
} else {
if (shearupdate) {
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairGranHookeHistoryCompute<HALFTHREAD,0,0,1>>(0,inum),*this);
} else {
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairGranHookeHistoryCompute<HALFTHREAD,0,0,0>>(0,inum),*this);
}
}
}
}
if (eflag_global) eng_vdwl += ev.evdwl;
if (vflag_global) {
virial[0] += ev.v[0];
virial[1] += ev.v[1];
virial[2] += ev.v[2];
virial[3] += ev.v[3];
virial[4] += ev.v[4];
virial[5] += ev.v[5];
}
if (eflag_atom) {
k_eatom.template modify<DeviceType>();
k_eatom.template sync<LMPHostType>();
}
if (vflag_atom) {
k_vatom.template modify<DeviceType>();
k_vatom.template sync<LMPHostType>();
}
if (vflag_fdotr) pair_virial_fdotr_compute(this);
copymode = 0;
}
template<class DeviceType>
template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG, int SHEARUPDATE>
KOKKOS_INLINE_FUNCTION
void PairGranHookeHistoryKokkos<DeviceType>::operator()(TagPairGranHookeHistoryCompute<NEIGHFLAG,NEWTON_PAIR,EVFLAG,SHEARUPDATE>, const int &ii) const {
// The f and torque arrays are atomic for Half/Thread neighbor style
Kokkos::View<F_FLOAT*[3], typename DAT::t_f_array::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_f = f;
Kokkos::View<F_FLOAT*[3], typename DAT::t_f_array::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_torque = torque;
int i = d_ilist[ii];
double xtmp = x(i,0);
double ytmp = x(i,1);
double ztmp = x(i,2);
int itype = type[i];
double imass = rmass[i];
double irad = radius[i];
int jnum = d_numneigh[i];
double fx_i = 0.0;
double fy_i = 0.0;
double fz_i = 0.0;
double torquex_i = 0.0;
double torquey_i = 0.0;
double torquez_i = 0.0;
for (int jj = 0; jj < jnum; jj++) {
int j = d_neighbors(i,jj);
j &= NEIGHMASK;
double delx = xtmp - x(j,0);
double dely = ytmp - x(j,1);
double delz = ztmp - x(j,2);
double rsq = delx*delx + dely*dely + delz*delz;
int jtype = type[j];
double jmass = rmass[j];
double jrad = radius[j];
double radsum = irad + jrad;
// check for touching neighbors
if (rsq >= radsum * radsum) {
d_firsttouch(i,jj) = 0;
d_firstshear(i,3*jj) = 0;
d_firstshear(i,3*jj+1) = 0;
d_firstshear(i,3*jj+2) = 0;
} else {
double r = sqrt(rsq);
double rinv = 1.0/r;
double rsqinv = 1/rsq;
// relative translational velocity
double vr1 = v(i,0) - v(j,0);
double vr2 = v(i,1) - v(j,1);
double vr3 = v(i,2) - v(j,2);
// normal component
double vnnr = vr1*delx + vr2*dely + vr3*delz;
double vn1 = delx*vnnr * rsqinv;
double vn2 = dely*vnnr * rsqinv;
double vn3 = delz*vnnr * rsqinv;
// tangential component
double vt1 = vr1 - vn1;
double vt2 = vr2 - vn2;
double vt3 = vr3 - vn3;
// relative rotational velocity
double wr1 = (irad*omega(i,0) + jrad*omega(j,0)) * rinv;
double wr2 = (irad*omega(i,1) + jrad*omega(j,1)) * rinv;
double wr3 = (irad*omega(i,2) + jrad*omega(j,2)) * rinv;
double meff = imass*jmass / (imass+jmass);
if (mask[i] & freeze_group_bit) meff = jmass;
if (mask[j] & freeze_group_bit) meff = imass;
double damp = meff*gamman*vnnr*rsqinv;
double ccel = kn*(radsum-r)*rinv - damp;
// relative velocities
double vtr1 = vt1 - (delz*wr2-dely*wr3);
double vtr2 = vt2 - (delx*wr3-delz*wr1);
double vtr3 = vt3 - (dely*wr1-delx*wr2);
double vrel = vtr1*vtr1 + vtr2*vtr2 + vtr3*vtr3;
vrel = sqrt(vrel);
// shear history effects
d_firsttouch(i,jj) = 1;
double shear1 = d_firstshear(i,3*jj);
double shear2 = d_firstshear(i,3*jj+1);
double shear3 = d_firstshear(i,3*jj+2);
if (SHEARUPDATE) {
shear1 += vtr1*dt;
shear2 += vtr2*dt;
shear3 += vtr3*dt;
}
double shrmag = sqrt(shear1*shear1 + shear2*shear2 +
shear3*shear3);
// rotate shear displacements
double rsht = shear1*delx + shear2*dely + shear3*delz;
rsht *= rsqinv;
if (SHEARUPDATE) {
shear1 -= rsht*delx;
shear2 -= rsht*dely;
shear3 -= rsht*delz;
}
// tangential forces = shear + tangential velocity damping
double fs1 = - (kt*shear1 + meff*gammat*vtr1);
double fs2 = - (kt*shear2 + meff*gammat*vtr2);
double fs3 = - (kt*shear3 + meff*gammat*vtr3);
// rescale frictional displacements and forces if needed
double fs = sqrt(fs1*fs1 + fs2*fs2 + fs3*fs3);
double fn = xmu * fabs(ccel*r);
if (fs > fn) {
if (shrmag != 0.0) {
shear1 = (fn/fs) * (shear1 + meff*gammat*vtr1/kt) -
meff*gammat*vtr1/kt;
shear2 = (fn/fs) * (shear2 + meff*gammat*vtr2/kt) -
meff*gammat*vtr2/kt;
shear3 = (fn/fs) * (shear3 + meff*gammat*vtr3/kt) -
meff*gammat*vtr3/kt;
fs1 *= fn/fs;
fs2 *= fn/fs;
fs3 *= fn/fs;
} else fs1 = fs2 = fs3 = 0.0;
}
if (SHEARUPDATE) {
d_firstshear(i,3*jj) = shear1;
d_firstshear(i,3*jj+1) = shear2;
d_firstshear(i,3*jj+2) = shear3;
}
// forces & torques
double fx = delx*ccel + fs1;
double fy = dely*ccel + fs2;
double fz = delz*ccel + fs3;
fx_i += fx;
fy_i += fy;
fz_i += fz;
double tor1 = rinv * (dely*fs3 - delz*fs2);
double tor2 = rinv * (delz*fs1 - delx*fs3);
double tor3 = rinv * (delx*fs2 - dely*fs1);
torquex_i -= irad*tor1;
torquey_i -= irad*tor2;
torquez_i -= irad*tor3;
if (NEWTON_PAIR || j < nlocal) {
a_f(j,0) -= fx;
a_f(j,1) -= fy;
a_f(j,2) -= fz;
a_torque(j,0) -= jrad*tor1;
a_torque(j,1) -= jrad*tor2;
a_torque(j,2) -= jrad*tor3;
}
}
}
a_f(i,0) += fx_i;
a_f(i,1) += fy_i;
a_f(i,2) += fz_i;
a_torque(i,0) += torquex_i;
a_torque(i,1) += torquey_i;
a_torque(i,2) += torquez_i;
}
namespace LAMMPS_NS {
template class PairGranHookeHistoryKokkos<LMPDeviceType>;
#ifdef KOKKOS_HAVE_CUDA
template class PairGranHookeHistoryKokkos<LMPHostType>;
#endif
}

View File

@ -0,0 +1,127 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef PAIR_CLASS
PairStyle(gran/hooke/history/kk,PairGranHookeHistoryKokkos<LMPDeviceType>)
PairStyle(gran/hooke/history/kk/device,PairGranHookeHistoryKokkos<LMPDeviceType>)
PairStyle(gran/hooke/history/kk/host,PairGranHookeHistoryKokkos<LMPHostType>)
#else
#ifndef LMP_PAIR_GRAN_HOOKE_HISTORY_KOKKOS_H
#define LMP_PAIR_GRAN_HOOKE_HISTORY_KOKKOS_H
#include "pair_gran_hooke_history.h"
#include "pair_kokkos.h"
#include "kokkos_type.h"
namespace LAMMPS_NS {
template <class DeviceType>
class FixNeighHistoryKokkos;
template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG, int SHEARUPDATE>
struct TagPairGranHookeHistoryCompute{};
template <class DeviceType>
class PairGranHookeHistoryKokkos : public PairGranHookeHistory {
public:
typedef DeviceType device_type;
typedef ArrayTypes<DeviceType> AT;
PairGranHookeHistoryKokkos(class LAMMPS *);
virtual ~PairGranHookeHistoryKokkos();
virtual void compute(int, int);
void init_style();
template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG, int SHEARUPDATE>
KOKKOS_INLINE_FUNCTION
void operator()(TagPairGranHookeHistoryCompute<NEIGHFLAG,NEWTON_PAIR,EVFLAG,SHEARUPDATE>, const int&) const;
protected:
typename AT::t_x_array_randomread x;
typename AT::t_x_array c_x;
typename AT::t_v_array_randomread v;
typename AT::t_v_array_randomread omega;
typename AT::t_f_array f;
typename AT::t_f_array torque;
typename AT::t_int_1d_randomread type;
typename AT::t_int_1d_randomread mask;
typename AT::t_float_1d_randomread rmass;
typename AT::t_float_1d_randomread radius;
DAT::tdual_efloat_1d k_eatom;
DAT::tdual_virial_array k_vatom;
typename AT::t_efloat_1d d_eatom;
typename AT::t_virial_array d_vatom;
typename AT::t_tagint_1d tag;
typename AT::t_neighbors_2d d_neighbors;
typename AT::t_int_1d_randomread d_ilist;
typename AT::t_int_1d_randomread d_numneigh;
typename AT::t_int_2d d_firsttouch;
typename AT::t_float_2d d_firstshear;
int newton_pair;
double special_lj[4];
int neighflag;
int nlocal,nall,eflag,vflag;
FixNeighHistoryKokkos<DeviceType> *fix_historyKK;
friend void pair_virial_fdotr_compute<PairGranHookeHistoryKokkos>(PairGranHookeHistoryKokkos*);
};
}
#endif
#endif
/* ERROR/WARNING messages:
E: Illegal ... command
Self-explanatory. Check the input script syntax and compare to the
documentation for the command. You can use -echo screen as a
command-line option when running LAMMPS to see the offending line.
E: Incorrect args for pair coefficients
Self-explanatory. Check the input script or data file.
E: Pair granular requires atom attributes radius, rmass
The atom style defined does not have these attributes.
E: Pair granular requires ghost atoms store velocity
Use the comm_modify vel yes command to enable this.
E: Could not find pair fix neigh history ID
UNDOCUMENTED
U: Pair granular with shear history requires newton pair off
This is a current restriction of the implementation of pair
granular styles with history.
U: Could not find pair fix ID
A fix is created internally by the pair style to store shear
history information. You cannot delete it.
*/

View File

@ -55,6 +55,20 @@ struct ForceAdder {
/* ---------------------------------------------------------------------- */
template<class View>
struct Zero {
View v;
Zero(const View &v_):v(v_) {}
KOKKOS_INLINE_FUNCTION
void operator()(const int &i) const {
v(i,0) = 0;
v(i,1) = 0;
v(i,2) = 0;
}
};
/* ---------------------------------------------------------------------- */
VerletKokkos::VerletKokkos(LAMMPS *lmp, int narg, char **arg) :
Verlet(lmp, narg, arg)
{
@ -120,6 +134,7 @@ void VerletKokkos::setup(int flag)
atomKK->modified(Host,ALL_MASK);
neighbor->build(1);
modify->setup_post_neighbor();
neighbor->ncalls = 0;
// compute all forces
@ -175,7 +190,7 @@ void VerletKokkos::setup(int flag)
modify->setup(vflag);
output->setup(flag);
lmp->kokkos->auto_sync = 1;
update->setupflag = 1;
update->setupflag = 0;
}
/* ----------------------------------------------------------------------
@ -223,6 +238,7 @@ void VerletKokkos::setup_minimal(int flag)
atomKK->modified(Host,ALL_MASK);
neighbor->build(1);
modify->setup_post_neighbor();
neighbor->ncalls = 0;
}
@ -567,8 +583,6 @@ void VerletKokkos::run(int n)
void VerletKokkos::force_clear()
{
int i;
if (external_force_clear) return;
// clear force on all particles
@ -576,23 +590,15 @@ void VerletKokkos::force_clear()
// when using threads always clear all forces.
if (neighbor->includegroup == 0) {
int nall;
if (force->newton) nall = atomKK->nlocal + atomKK->nghost;
else nall = atomKK->nlocal;
int nall = atomKK->nlocal;
if (force->newton) nall += atomKK->nghost;
size_t nbytes = sizeof(double) * nall;
if (nbytes) {
if (atomKK->k_f.modified_host() > atomKK->k_f.modified_device()) {
memset_kokkos(atomKK->k_f.view<LMPHostType>());
atomKK->modified(Host,F_MASK);
atomKK->sync(Device,F_MASK);
} else {
memset_kokkos(atomKK->k_f.view<LMPDeviceType>());
atomKK->modified(Device,F_MASK);
}
if (torqueflag) memset(&(atomKK->torque[0][0]),0,3*nbytes);
Kokkos::parallel_for(nall, Zero<typename ArrayTypes<LMPDeviceType>::t_f_array>(atomKK->k_f.view<LMPDeviceType>()));
atomKK->modified(Device,F_MASK);
if (torqueflag) {
Kokkos::parallel_for(nall, Zero<typename ArrayTypes<LMPDeviceType>::t_f_array>(atomKK->k_torque.view<LMPDeviceType>()));
atomKK->modified(Device,TORQUE_MASK);
}
// neighbor includegroup flag is set
@ -600,35 +606,23 @@ void VerletKokkos::force_clear()
// if either newton flag is set, also include ghosts
} else {
int nall = atomKK->nfirst;
if (atomKK->k_f.modified_host() > atomKK->k_f.modified_device()) {
memset_kokkos(atomKK->k_f.view<LMPHostType>());
atomKK->modified(Host,F_MASK);
} else {
memset_kokkos(atomKK->k_f.view<LMPDeviceType>());
atomKK->modified(Device,F_MASK);
}
Kokkos::parallel_for(atomKK->nfirst, Zero<typename ArrayTypes<LMPDeviceType>::t_f_array>(atomKK->k_f.view<LMPDeviceType>()));
atomKK->modified(Device,F_MASK);
if (torqueflag) {
double **torque = atomKK->torque;
for (i = 0; i < nall; i++) {
torque[i][0] = 0.0;
torque[i][1] = 0.0;
torque[i][2] = 0.0;
}
Kokkos::parallel_for(atomKK->nfirst, Zero<typename ArrayTypes<LMPDeviceType>::t_f_array>(atomKK->k_torque.view<LMPDeviceType>()));
atomKK->modified(Device,TORQUE_MASK);
}
if (force->newton) {
nall = atomKK->nlocal + atomKK->nghost;
auto range = Kokkos::RangePolicy<LMPDeviceType>(atomKK->nlocal, atomKK->nlocal + atomKK->nghost);
Kokkos::parallel_for(range, Zero<typename ArrayTypes<LMPDeviceType>::t_f_array>(atomKK->k_f.view<LMPDeviceType>()));
atomKK->modified(Device,F_MASK);
if (torqueflag) {
double **torque = atomKK->torque;
for (i = atomKK->nlocal; i < nall; i++) {
torque[i][0] = 0.0;
torque[i][1] = 0.0;
torque[i][2] = 0.0;
}
Kokkos::parallel_for(range, Zero<typename ArrayTypes<LMPDeviceType>::t_f_array>(atomKK->k_torque.view<LMPDeviceType>()));
atomKK->modified(Device,TORQUE_MASK);
}
}
}
}

View File

@ -609,7 +609,7 @@ void CommBrick::exchange()
maxexchange = maxexchange_atom + maxexchange_fix;
bufextra = maxexchange + BUFEXTRA;
if (bufextra > bufextra_old)
memory->grow(buf_send,maxsend+bufextra,"comm:buf_send");
grow_send(maxsend+bufextra,1);
// subbox bounds for orthogonal or triclinic

View File

@ -95,6 +95,8 @@ FixNeighHistory::FixNeighHistory(LAMMPS *lmp, int narg, char **arg) :
FixNeighHistory::~FixNeighHistory()
{
if (copymode) return;
// unregister this fix so atom class doesn't invoke it any more
atom->delete_callback(id,0);