diff --git a/doc/Manual.html b/doc/Manual.html
index 391af4c8d9..76e6c1997d 100644
--- a/doc/Manual.html
+++ b/doc/Manual.html
@@ -1,7 +1,7 @@
16 Apr 2015 version
+18 Apr 2015 version
Version info:
diff --git a/doc/Manual.txt b/doc/Manual.txt
index 92ec1a286d..8daea98626 100644
--- a/doc/Manual.txt
+++ b/doc/Manual.txt
@@ -1,6 +1,6 @@
LAMMPS-ICMS Users Manual
-
+
@@ -18,7 +18,7 @@
LAMMPS-ICMS Documentation :c,h3
-16 Apr 2015 version :c,h4
+18 Apr 2015 version :c,h4
Version info: :h4
diff --git a/doc/Section_commands.html b/doc/Section_commands.html
index 08dbd1e3bc..57b4474939 100644
--- a/doc/Section_commands.html
+++ b/doc/Section_commands.html
@@ -496,7 +496,7 @@ KOKKOS, o = USER-OMP, t = OPT.
| comb (o) | comb3 | coul/cut (gko) | coul/debye (gko) |
| coul/dsf (gko) | coul/long (gko) | coul/msm | coul/streitz |
| coul/wolf (ko) | dpd (o) | dpd/tstat (o) | dsmc |
-| eam (cgkot) | eam/alloy (cgot) | eam/fs (cgot) | eim (o) |
+| eam (cgkot) | eam/alloy (cgkot) | eam/fs (cgkot) | eim (o) |
| gauss (go) | gayberne (gio) | gran/hertz/history (o) | gran/hooke (co) |
| gran/hooke/history (o) | hbond/dreiding/lj (o) | hbond/dreiding/morse (o) | kim |
| lcbop | line/lj (o) | lj/charmm/coul/charmm (cko) | lj/charmm/coul/charmm/implicit (cko) |
@@ -511,8 +511,8 @@ KOKKOS, o = USER-OMP, t = OPT.
| nm/cut (o) | nm/cut/coul/cut (o) | nm/cut/coul/long (o) | peri/eps |
| peri/lps (o) | peri/pmb (o) | peri/ves | reax |
| rebo (o) | resquared (go) | snap | soft (go) |
-| sw (cgo) | table (gko) | tersoff (co) | tersoff/mod (o) |
-| tersoff/zbl (o) | tip4p/cut (o) | tip4p/long (o) | tri/lj (o) |
+| sw (cgkio) | table (gko) | tersoff (cko) | tersoff/mod (ko) |
+| tersoff/zbl (ko) | tip4p/cut (o) | tip4p/long (o) | tri/lj (o) |
| yukawa (go) | yukawa/colloid (go) | zbl (o)
|
diff --git a/doc/Section_commands.txt b/doc/Section_commands.txt
index 2f198dfc9d..f1df3a967c 100644
--- a/doc/Section_commands.txt
+++ b/doc/Section_commands.txt
@@ -770,8 +770,8 @@ KOKKOS, o = USER-OMP, t = OPT.
"dpd/tstat (o)"_pair_dpd.html,
"dsmc"_pair_dsmc.html,
"eam (cgkot)"_pair_eam.html,
-"eam/alloy (cgot)"_pair_eam.html,
-"eam/fs (cgot)"_pair_eam.html,
+"eam/alloy (cgkot)"_pair_eam.html,
+"eam/fs (cgkot)"_pair_eam.html,
"eim (o)"_pair_eim.html,
"gauss (go)"_pair_gauss.html,
"gayberne (gio)"_pair_gayberne.html,
@@ -829,11 +829,11 @@ KOKKOS, o = USER-OMP, t = OPT.
"resquared (go)"_pair_resquared.html,
"snap"_pair_snap.html,
"soft (go)"_pair_soft.html,
-"sw (cgo)"_pair_sw.html,
+"sw (cgkio)"_pair_sw.html,
"table (gko)"_pair_table.html,
-"tersoff (co)"_pair_tersoff.html,
-"tersoff/mod (o)"_pair_tersoff_mod.html,
-"tersoff/zbl (o)"_pair_tersoff_zbl.html,
+"tersoff (cko)"_pair_tersoff.html,
+"tersoff/mod (ko)"_pair_tersoff_mod.html,
+"tersoff/zbl (ko)"_pair_tersoff_zbl.html,
"tip4p/cut (o)"_pair_coul.html,
"tip4p/long (o)"_pair_coul.html,
"tri/lj (o)"_pair_tri_lj.html,
diff --git a/doc/pair_eam.html b/doc/pair_eam.html
index 9c8ef97aa8..afc55cbbab 100644
--- a/doc/pair_eam.html
+++ b/doc/pair_eam.html
@@ -27,6 +27,8 @@
pair_style eam/alloy/gpu command
+pair_style eam/alloy/kk command
+
pair_style eam/alloy/omp command
pair_style eam/alloy/opt command
@@ -41,6 +43,8 @@
pair_style eam/fs/gpu command
+pair_style eam/fs/kk command
+
pair_style eam/fs/omp command
pair_style eam/fs/opt command
diff --git a/doc/pair_eam.txt b/doc/pair_eam.txt
index 5a90e0780b..f88d9363cb 100644
--- a/doc/pair_eam.txt
+++ b/doc/pair_eam.txt
@@ -15,6 +15,7 @@ pair_style eam/opt command :h3
pair_style eam/alloy command :h3
pair_style eam/alloy/cuda command :h3
pair_style eam/alloy/gpu command :h3
+pair_style eam/alloy/kk command :h3
pair_style eam/alloy/omp command :h3
pair_style eam/alloy/opt command :h3
pair_style eam/cd command :h3
@@ -22,6 +23,7 @@ pair_style eam/cd/omp command :h3
pair_style eam/fs command :h3
pair_style eam/fs/cuda command :h3
pair_style eam/fs/gpu command :h3
+pair_style eam/fs/kk command :h3
pair_style eam/fs/omp command :h3
pair_style eam/fs/opt command :h3
diff --git a/doc/pair_sw.html b/doc/pair_sw.html
index 70611a5328..8a9164f90b 100644
--- a/doc/pair_sw.html
+++ b/doc/pair_sw.html
@@ -17,6 +17,8 @@
pair_style sw/intel command
+pair_style sw/kk command
+
pair_style sw/omp command
Syntax:
diff --git a/doc/pair_sw.txt b/doc/pair_sw.txt
index b2f9f8a5f2..27a3c5046b 100644
--- a/doc/pair_sw.txt
+++ b/doc/pair_sw.txt
@@ -10,6 +10,7 @@ pair_style sw command :h3
pair_style sw/cuda command :h3
pair_style sw/gpu command :h3
pair_style sw/intel command :h3
+pair_style sw/kk command :h3
pair_style sw/omp command :h3
[Syntax:]
diff --git a/doc/pair_tersoff.html b/doc/pair_tersoff.html
index 53ef9ff63c..1d17fc697e 100644
--- a/doc/pair_tersoff.html
+++ b/doc/pair_tersoff.html
@@ -15,6 +15,8 @@
pair_style tersoff/cuda
+pair_style tersoff/kk
+
pair_style tersoff/omp
pair_style tersoff/table/omp command
diff --git a/doc/pair_tersoff.txt b/doc/pair_tersoff.txt
index 2274c8e769..869a1545e9 100644
--- a/doc/pair_tersoff.txt
+++ b/doc/pair_tersoff.txt
@@ -9,6 +9,7 @@
pair_style tersoff command :h3
pair_style tersoff/table command :h3
pair_style tersoff/cuda :h3
+pair_style tersoff/kk :h3
pair_style tersoff/omp :h3
pair_style tersoff/table/omp command :h3
diff --git a/doc/pair_tersoff_mod.html b/doc/pair_tersoff_mod.html
index 401773929f..2e6a5a366b 100644
--- a/doc/pair_tersoff_mod.html
+++ b/doc/pair_tersoff_mod.html
@@ -11,6 +11,8 @@
pair_style tersoff/mod command
+pair_style tersoff/mod/kk command
+
pair_style tersoff/mod/omp command
Syntax:
diff --git a/doc/pair_tersoff_mod.txt b/doc/pair_tersoff_mod.txt
index d0a9467e4a..8e87fa8e89 100644
--- a/doc/pair_tersoff_mod.txt
+++ b/doc/pair_tersoff_mod.txt
@@ -7,6 +7,7 @@
:line
pair_style tersoff/mod command :h3
+pair_style tersoff/mod/kk command :h3
pair_style tersoff/mod/omp command :h3
[Syntax:]
diff --git a/doc/pair_tersoff_zbl.html b/doc/pair_tersoff_zbl.html
index 4ddbed38b8..3374f89148 100644
--- a/doc/pair_tersoff_zbl.html
+++ b/doc/pair_tersoff_zbl.html
@@ -11,6 +11,8 @@
pair_style tersoff/zbl command
+pair_style tersoff/zbl/kk command
+
pair_style tersoff/zbl/omp command
Syntax:
diff --git a/doc/pair_tersoff_zbl.txt b/doc/pair_tersoff_zbl.txt
index 6b95b3f338..b2a8537b27 100644
--- a/doc/pair_tersoff_zbl.txt
+++ b/doc/pair_tersoff_zbl.txt
@@ -7,6 +7,7 @@
:line
pair_style tersoff/zbl command :h3
+pair_style tersoff/zbl/kk command :h3
pair_style tersoff/zbl/omp command :h3
[Syntax:]
diff --git a/src/KOKKOS/Install.sh b/src/KOKKOS/Install.sh
index c5c5c6d2f7..0eea611d20 100644
--- a/src/KOKKOS/Install.sh
+++ b/src/KOKKOS/Install.sh
@@ -98,6 +98,10 @@ action pair_coul_wolf_kokkos.cpp
action pair_coul_wolf_kokkos.h
action pair_eam_kokkos.cpp pair_eam.cpp
action pair_eam_kokkos.h pair_eam.h
+action pair_eam_alloy_kokkos.cpp pair_eam_alloy.cpp
+action pair_eam_alloy_kokkos.h pair_eam_alloy.h
+action pair_eam_fs_kokkos.cpp pair_eam_fs.cpp
+action pair_eam_fs_kokkos.h pair_eam_fs.h
action pair_kokkos.h
action pair_lj_charmm_coul_charmm_implicit_kokkos.cpp pair_lj_charmm_coul_charmm_implicit.cpp
action pair_lj_charmm_coul_charmm_implicit_kokkos.h pair_lj_charmm_coul_charmm_implicit.h
@@ -129,8 +133,16 @@ action pair_lj_gromacs_kokkos.cpp
action pair_lj_gromacs_kokkos.h
action pair_lj_sdk_kokkos.cpp pair_lj_sdk.cpp
action pair_lj_sdk_kokkos.h pair_lj_sdk.h
+action pair_sw_kokkos.cpp pair_sw.cpp
+action pair_sw_kokkos.h pair_sw.h
action pair_table_kokkos.cpp
action pair_table_kokkos.h
+action pair_tersoff_kokkos.cpp pair_tersoff.cpp
+action pair_tersoff_kokkos.h pair_tersoff.h
+action pair_tersoff_mod_kokkos.cpp pair_tersoff_mod.cpp
+action pair_tersoff_mod_kokkos.h pair_tersoff_mod.h
+action pair_tersoff_zbl_kokkos.cpp pair_tersoff_zbl.cpp
+action pair_tersoff_zbl_kokkos.h pair_tersoff_zbl.h
action verlet_kokkos.cpp
action verlet_kokkos.h
diff --git a/src/KOKKOS/kokkos_type.h b/src/KOKKOS/kokkos_type.h
index 40a0991cfc..123bbd1a8c 100644
--- a/src/KOKKOS/kokkos_type.h
+++ b/src/KOKKOS/kokkos_type.h
@@ -214,6 +214,14 @@ typedef tdual_int_1d::t_dev_um t_int_1d_um;
typedef tdual_int_1d::t_dev_const_um t_int_1d_const_um;
typedef tdual_int_1d::t_dev_const_randomread t_int_1d_randomread;
+typedef Kokkos::
+ DualView tdual_int_1d_3;
+typedef tdual_int_1d_3::t_dev t_int_1d_3;
+typedef tdual_int_1d_3::t_dev_const t_int_1d_3_const;
+typedef tdual_int_1d_3::t_dev_um t_int_1d_3_um;
+typedef tdual_int_1d_3::t_dev_const_um t_int_1d_3_const_um;
+typedef tdual_int_1d_3::t_dev_const_randomread t_int_1d_3_randomread;
+
typedef Kokkos::
DualView tdual_int_2d;
typedef tdual_int_2d::t_dev t_int_2d;
@@ -440,6 +448,13 @@ typedef tdual_int_1d::t_host_um t_int_1d_um;
typedef tdual_int_1d::t_host_const_um t_int_1d_const_um;
typedef tdual_int_1d::t_host_const_randomread t_int_1d_randomread;
+typedef Kokkos::DualView tdual_int_1d_3;
+typedef tdual_int_1d_3::t_host t_int_1d_3;
+typedef tdual_int_1d_3::t_host_const t_int_1d_3_const;
+typedef tdual_int_1d_3::t_host_um t_int_1d_3_um;
+typedef tdual_int_1d_3::t_host_const_um t_int_1d_3_const_um;
+typedef tdual_int_1d_3::t_host_const_randomread t_int_1d_3_randomread;
+
typedef Kokkos::DualView tdual_int_2d;
typedef tdual_int_2d::t_host t_int_2d;
typedef tdual_int_2d::t_host_const t_int_2d_const;
diff --git a/src/KOKKOS/neigh_full_kokkos.h b/src/KOKKOS/neigh_full_kokkos.h
index a5125b901f..388d77dae1 100644
--- a/src/KOKKOS/neigh_full_kokkos.h
+++ b/src/KOKKOS/neigh_full_kokkos.h
@@ -19,17 +19,20 @@ using namespace LAMMPS_NS;
/* ---------------------------------------------------------------------- */
-template
+template
void NeighborKokkos::full_bin_kokkos(NeighListKokkos *list)
{
- const int nall = includegroup?atom->nfirst:atom->nlocal;
+ const int nlocal = includegroup?atom->nfirst:atom->nlocal;
+ int nall = nlocal;
+ if (GHOST)
+ nall += atom->nghost;
list->grow(nall);
NeighborKokkosExecute
data(*list,
k_cutneighsq.view(),
k_bincount.view(),
- k_bins.view(),nall,
+ k_bins.view(),nlocal,
atomKK->k_x.view(),
atomKK->k_type.view(),
atomKK->k_mask.view(),
@@ -68,6 +71,8 @@ void NeighborKokkos::full_bin_kokkos(NeighListKokkos *list)
k_ex_mol_bit.sync();
atomKK->sync(Device,X_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK|TAG_MASK|SPECIAL_MASK);
Kokkos::deep_copy(list->d_stencil,list->h_stencil);
+ if (GHOST)
+ Kokkos::deep_copy(list->d_stencilxyz,list->h_stencilxyz);
data.special_flag[0] = special_flag[0];
data.special_flag[1] = special_flag[1];
@@ -119,20 +124,25 @@ void NeighborKokkos::full_bin_kokkos(NeighListKokkos *list)
const int factor = 1;
#endif
-if(newton_pair) {
- NeighborKokkosBuildFunctor f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor);
-#ifdef KOKKOS_HAVE_CUDA
- Kokkos::parallel_for(config, f);
-#else
+if (GHOST && !HALF_NEIGH) {
+ NeighborKokkosBuildFunctorFullGhost f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor);
Kokkos::parallel_for(nall, f);
-#endif
} else {
- NeighborKokkosBuildFunctor f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor);
+ if(newton_pair) {
+ NeighborKokkosBuildFunctor f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor);
#ifdef KOKKOS_HAVE_CUDA
- Kokkos::parallel_for(config, f);
+ Kokkos::parallel_for(config, f);
#else
- Kokkos::parallel_for(nall, f);
+ Kokkos::parallel_for(nall, f);
#endif
+ } else {
+ NeighborKokkosBuildFunctor f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor);
+#ifdef KOKKOS_HAVE_CUDA
+ Kokkos::parallel_for(config, f);
+#else
+ Kokkos::parallel_for(nall, f);
+#endif
+ }
}
DeviceType::fence();
deep_copy(data.h_resize, data.resize);
@@ -146,8 +156,13 @@ if(newton_pair) {
}
}
- list->inum = nall;
- list->gnum = 0;
+ if (GHOST) {
+ list->inum = atom->nlocal;
+ list->gnum = nall - atom->nlocal;
+ } else {
+ list->inum = nall;
+ list->gnum = 0;
+ }
}
@@ -524,6 +539,120 @@ void NeighborKokkosExecute::build_ItemCuda(typename Kokkos::TeamPoli
}
#endif
+/* ---------------------------------------------------------------------- */
+
+template
+void NeighborKokkosExecute::
+ build_Item_Full_Ghost(const int &i) const
+{
+ /* if necessary, goto next page and add pages */
+ int n = 0;
+ int which = 0;
+ int moltemplate;
+ if (molecular == 2) moltemplate = 1;
+ else moltemplate = 0;
+ // get subview of neighbors of i
+
+ const AtomNeighbors neighbors_i = neigh_list.get_neighbors(i);
+ const X_FLOAT xtmp = x(i, 0);
+ const X_FLOAT ytmp = x(i, 1);
+ const X_FLOAT ztmp = x(i, 2);
+ const int itype = type(i);
+
+ const int nstencil = neigh_list.nstencil;
+ const typename ArrayTypes::t_int_1d_const_um stencil
+ = neigh_list.d_stencil;
+ const typename ArrayTypes::t_int_1d_3_const_um stencilxyz
+ = neigh_list.d_stencilxyz;
+
+ // loop over all atoms in surrounding bins in stencil including self
+ // when i is a ghost atom, must check if stencil bin is out of bounds
+ // skip i = j
+ // no molecular test when i = ghost atom
+
+ if (i < nlocal) {
+ const int ibin = coord2bin(xtmp, ytmp, ztmp);
+ for (int k = 0; k < nstencil; k++) {
+ const int jbin = ibin + stencil[k];
+ for(int m = 0; m < c_bincount(jbin); m++) {
+ const int j = c_bins(jbin,m);
+ if (i == j) continue;
+
+ const int jtype = type[j];
+ if(exclude && exclusion(i,j,itype,jtype)) continue;
+
+ const X_FLOAT delx = xtmp - x(j,0);
+ const X_FLOAT dely = ytmp - x(j,1);
+ const X_FLOAT delz = ztmp - x(j,2);
+ const X_FLOAT rsq = delx*delx + dely*dely + delz*delz;
+
+ if (rsq <= cutneighsq(itype,jtype)) {
+ if (molecular) {
+ if (!moltemplate)
+ which = find_special(i,j);
+ /* else if (imol >= 0) */
+ /* which = find_special(onemols[imol]->special[iatom], */
+ /* onemols[imol]->nspecial[iatom], */
+ /* tag[j]-tagprev); */
+ /* else which = 0; */
+ if (which == 0){
+ if(n 0) {
+ if(n= mbinx ||
+ ybin2 < 0 || ybin2 >= mbiny ||
+ zbin2 < 0 || zbin2 >= mbinz) continue;
+ const int jbin = ibin + stencil[k];
+ for(int m = 0; m < c_bincount(jbin); m++) {
+ const int j = c_bins(jbin,m);
+ if (i == j) continue;
+
+ const int jtype = type[j];
+ if(exclude && exclusion(i,j,itype,jtype)) continue;
+
+ const X_FLOAT delx = xtmp - x(j,0);
+ const X_FLOAT dely = ytmp - x(j,1);
+ const X_FLOAT delz = ztmp - x(j,2);
+ const X_FLOAT rsq = delx*delx + dely*dely + delz*delz;
+
+ if (rsq <= cutneighsq(itype,jtype)) {
+ if(n= neigh_list.maxneighs) {
+ resize() = 1;
+
+ if(n >= new_maxneighs()) new_maxneighs() = n;
+ }
+ neigh_list.d_ilist(i) = i;
+}
+
template
void NeighborKokkos::full_bin_cluster_kokkos(NeighListKokkos *list)
{
diff --git a/src/KOKKOS/neigh_list_kokkos.cpp b/src/KOKKOS/neigh_list_kokkos.cpp
index 2730c15a2b..4a1b77f3b6 100644
--- a/src/KOKKOS/neigh_list_kokkos.cpp
+++ b/src/KOKKOS/neigh_list_kokkos.cpp
@@ -81,8 +81,8 @@ void NeighListKokkos::stencil_allocate(int smax, int style)
memory->create_kokkos(d_stencil,h_stencil,stencil,maxstencil,
"neighlist:stencil");
if (ghostflag) {
- memory->destroy(stencilxyz);
- memory->create(stencilxyz,maxstencil,3,"neighlist:stencilxyz");
+ memory->create_kokkos(d_stencilxyz,h_stencilxyz,stencilxyz,maxstencil,
+ 3,"neighlist:stencilxyz");
}
}
diff --git a/src/KOKKOS/neigh_list_kokkos.h b/src/KOKKOS/neigh_list_kokkos.h
index 87acab2408..8dc80b83b7 100644
--- a/src/KOKKOS/neigh_list_kokkos.h
+++ b/src/KOKKOS/neigh_list_kokkos.h
@@ -48,7 +48,7 @@ class AtomNeighborsConst
const int num_neighs;
KOKKOS_INLINE_FUNCTION
- AtomNeighborsConst(int* const & firstneigh, const int & _num_neighs,
+ AtomNeighborsConst(int* const & firstneigh, const int & _num_neighs,
const int & stride):
_firstneigh(firstneigh), num_neighs(_num_neighs), _stride(stride) {};
KOKKOS_INLINE_FUNCTION
@@ -75,6 +75,8 @@ public:
typename ArrayTypes::t_int_1d d_numneigh; // # of J neighs for each I
typename ArrayTypes::t_int_1d d_stencil; // # of J neighs for each I
typename ArrayTypes::t_int_1d h_stencil; // # of J neighs per I
+ typename ArrayTypes::t_int_1d_3 d_stencilxyz;
+ typename ArrayTypes::t_int_1d_3 h_stencilxyz;
NeighListKokkos(class LAMMPS *lmp):
NeighList(lmp) {_stride = 1; maxneighs = 16;};
diff --git a/src/KOKKOS/neighbor_kokkos.cpp b/src/KOKKOS/neighbor_kokkos.cpp
index 0558c4b931..43c2f4e06e 100644
--- a/src/KOKKOS/neighbor_kokkos.cpp
+++ b/src/KOKKOS/neighbor_kokkos.cpp
@@ -269,18 +269,32 @@ void NeighborKokkos::choose_build(int index, NeighRequest *rq)
{
if (rq->kokkos_host != 0) {
PairPtrHost pb = NULL;
- if (rq->full) pb = &NeighborKokkos::full_bin_kokkos;
- else if (rq->half) pb = &NeighborKokkos::full_bin_kokkos;
- pair_build_host[index] = pb;
+ if (rq->ghost) {
+ if (rq->full) pb = &NeighborKokkos::full_bin_kokkos;
+ else if (rq->half) error->one(FLERR,"Cannot (yet) request ghost atoms with Kokkos half neighbor list");
+ pair_build_host[index] = pb;
+ } else {
+ if (rq->full) pb = &NeighborKokkos::full_bin_kokkos;
+ else if (rq->half) pb = &NeighborKokkos::full_bin_kokkos;
+ pair_build_host[index] = pb;
+ }
return;
}
if (rq->kokkos_device != 0) {
PairPtrDevice pb = NULL;
- if (rq->full) {
- if (rq->full_cluster) pb = &NeighborKokkos::full_bin_cluster_kokkos;
- else pb = &NeighborKokkos::full_bin_kokkos;
+ if (rq->ghost) {
+ if (rq->full) {
+ if (rq->full_cluster) pb = &NeighborKokkos::full_bin_cluster_kokkos;
+ else pb = &NeighborKokkos::full_bin_kokkos;
+ }
+ else if (rq->half) pb = &NeighborKokkos::full_bin_kokkos;
+ } else {
+ if (rq->full) {
+ if (rq->full_cluster) pb = &NeighborKokkos::full_bin_cluster_kokkos;
+ else pb = &NeighborKokkos::full_bin_kokkos;
+ }
+ else if (rq->half) pb = &NeighborKokkos::full_bin_kokkos;
}
- else if (rq->half) pb = &NeighborKokkos::full_bin_kokkos;
pair_build_device[index] = pb;
return;
}
@@ -441,10 +455,14 @@ void NeighborKokkos::build_kokkos(int topoflag)
if (anyghostlist && atom->nlocal+atom->nghost > maxatom) {
maxatom = atom->nmax;
- for (i = 0; i < nglist; i++) lists[glist[i]]->grow(maxatom);
+ for (i = 0; i < nglist; i++)
+ if (lists[glist[i]]) lists[glist[i]]->grow(maxatom);
+ else init_list_grow_kokkos(glist[i]);
} else if (atom->nlocal > maxatom) {
maxatom = atom->nmax;
- for (i = 0; i < nglist; i++) lists[glist[i]]->grow(maxatom);
+ for (i = 0; i < nglist; i++)
+ if (lists[glist[i]]) lists[glist[i]]->grow(maxatom);
+ else init_list_grow_kokkos(glist[i]);
}
// extend atom bin list if necessary
diff --git a/src/KOKKOS/neighbor_kokkos.h b/src/KOKKOS/neighbor_kokkos.h
index 320a2fabf8..9264a90524 100644
--- a/src/KOKKOS/neighbor_kokkos.h
+++ b/src/KOKKOS/neighbor_kokkos.h
@@ -158,6 +158,9 @@ class NeighborKokkosExecute
KOKKOS_FUNCTION
void build_Item(const int &i) const;
+ KOKKOS_FUNCTION
+ void build_Item_Full_Ghost(const int &i) const;
+
template
KOKKOS_FUNCTION
void build_cluster_Item(const int &i) const;
@@ -203,6 +206,42 @@ class NeighborKokkosExecute
return (iz-mbinzlo)*mbiny*mbinx + (iy-mbinylo)*mbinx + (ix-mbinxlo);
}
+ KOKKOS_INLINE_FUNCTION
+ int coord2bin(const X_FLOAT & x,const X_FLOAT & y,const X_FLOAT & z, int* i) const
+ {
+ int ix,iy,iz;
+
+ if (x >= bboxhi[0])
+ ix = static_cast ((x-bboxhi[0])*bininvx) + nbinx;
+ else if (x >= bboxlo[0]) {
+ ix = static_cast ((x-bboxlo[0])*bininvx);
+ ix = MIN(ix,nbinx-1);
+ } else
+ ix = static_cast ((x-bboxlo[0])*bininvx) - 1;
+
+ if (y >= bboxhi[1])
+ iy = static_cast ((y-bboxhi[1])*bininvy) + nbiny;
+ else if (y >= bboxlo[1]) {
+ iy = static_cast ((y-bboxlo[1])*bininvy);
+ iy = MIN(iy,nbiny-1);
+ } else
+ iy = static_cast ((y-bboxlo[1])*bininvy) - 1;
+
+ if (z >= bboxhi[2])
+ iz = static_cast ((z-bboxhi[2])*bininvz) + nbinz;
+ else if (z >= bboxlo[2]) {
+ iz = static_cast ((z-bboxlo[2])*bininvz);
+ iz = MIN(iz,nbinz-1);
+ } else
+ iz = static_cast ((z-bboxlo[2])*bininvz) - 1;
+
+ i[0] = ix - mbinxlo;
+ i[1] = iy - mbinylo;
+ i[2] = iz - mbinzlo;
+
+ return (iz-mbinzlo)*mbiny*mbinx + (iy-mbinylo)*mbinx + (ix-mbinxlo);
+ }
+
KOKKOS_INLINE_FUNCTION
int exclusion(const int &i,const int &j, const int &itype,const int &jtype) const;
@@ -258,6 +297,23 @@ struct NeighborKokkosBuildFunctor {
#endif
};
+template
+struct NeighborKokkosBuildFunctorFullGhost {
+ typedef Device device_type;
+
+ const NeighborKokkosExecute c;
+ const size_t sharedsize;
+
+ NeighborKokkosBuildFunctorFullGhost(const NeighborKokkosExecute &_c,
+ const size_t _sharedsize):c(_c),
+ sharedsize(_sharedsize) {};
+
+ KOKKOS_INLINE_FUNCTION
+ void operator() (const int & i) const {
+ c.build_Item_Full_Ghost(i);
+ }
+};
+
template
struct NeighborClusterKokkosBuildFunctor {
typedef Device device_type;
@@ -358,7 +414,7 @@ class NeighborKokkos : public Neighbor {
(class NeighListKokkos *);
PairPtrDevice *pair_build_device;
- template
+ template
void full_bin_kokkos(NeighListKokkos *list);
template
void full_bin_cluster_kokkos(NeighListKokkos *list);
@@ -383,4 +439,8 @@ The number of nlocal + nghost atoms on a processor
is limited by the size of a 32-bit integer with 2 bits
removed for masking 1-2, 1-3, 1-4 neighbors.
+E: Cannot (yet) request ghost atoms with Kokkos half neighbor list
+
+This feature is not yet supported.
+
*/
diff --git a/src/KOKKOS/pair_eam_alloy_kokkos.cpp b/src/KOKKOS/pair_eam_alloy_kokkos.cpp
new file mode 100755
index 0000000000..2e56307779
--- /dev/null
+++ b/src/KOKKOS/pair_eam_alloy_kokkos.cpp
@@ -0,0 +1,1177 @@
+/* ----------------------------------------------------------------------
+ LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+ http://lammps.sandia.gov, Sandia National Laboratories
+ Steve Plimpton, sjplimp@sandia.gov
+
+ Copyright (2003) Sandia Corporation. Under the terms of Contract
+ DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+ certain rights in this software. This software is distributed under
+ the GNU General Public License.
+
+ See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+ Contributing authors: Stan Moore (SNL)
+------------------------------------------------------------------------- */
+
+#include "math.h"
+#include "stdio.h"
+#include "stdlib.h"
+#include "string.h"
+#include "kokkos.h"
+#include "pair_kokkos.h"
+#include "pair_eam_alloy_kokkos.h"
+#include "atom_kokkos.h"
+#include "force.h"
+#include "comm.h"
+#include "neighbor.h"
+#include "neigh_list_kokkos.h"
+#include "neigh_request.h"
+#include "memory.h"
+#include "error.h"
+#include "atom_masks.h"
+
+using namespace LAMMPS_NS;
+
+#define MAXLINE 1024
+
+// Cannot use virtual inheritance on the GPU, so must duplicate code
+
+/* ---------------------------------------------------------------------- */
+
+template
+PairEAMAlloyKokkos::PairEAMAlloyKokkos(LAMMPS *lmp) : PairEAM(lmp)
+{
+ respa_enable = 0;
+ one_coeff = 1;
+ manybody_flag = 1;
+
+ atomKK = (AtomKokkos *) atom;
+ execution_space = ExecutionSpaceFromDevice::space;
+ datamask_read = X_MASK | F_MASK | TYPE_MASK | ENERGY_MASK | VIRIAL_MASK;
+ datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template
+PairEAMAlloyKokkos::~PairEAMAlloyKokkos()
+{
+ if (!copymode) {
+ memory->destroy_kokkos(k_eatom,eatom);
+ memory->destroy_kokkos(k_vatom,vatom);
+ }
+}
+
+/* ---------------------------------------------------------------------- */
+
+template
+void PairEAMAlloyKokkos::compute(int eflag_in, int vflag_in)
+{
+ eflag = eflag_in;
+ vflag = vflag_in;
+
+ if (neighflag == FULL) no_virial_fdotr_compute = 1;
+
+ if (eflag || vflag) ev_setup(eflag,vflag);
+ else evflag = vflag_fdotr = 0;
+
+ // reallocate per-atom arrays if necessary
+
+ if (eflag_atom) {
+ memory->destroy_kokkos(k_eatom,eatom);
+ memory->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom");
+ d_eatom = k_eatom.d_view;
+ }
+ if (vflag_atom) {
+ memory->destroy_kokkos(k_vatom,vatom);
+ memory->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom");
+ d_vatom = k_vatom.d_view;
+ }
+
+ atomKK->sync(execution_space,datamask_read);
+ if (eflag || vflag) atomKK->modified(execution_space,datamask_modify);
+ else atomKK->modified(execution_space,F_MASK);
+
+ // grow energy and fp arrays if necessary
+ // need to be atom->nmax in length
+
+ if (atom->nmax > nmax) {
+ nmax = atom->nmax;
+ k_rho = DAT::tdual_ffloat_1d("pair:rho",nmax);
+ k_fp = DAT::tdual_ffloat_1d("pair:fp",nmax);
+ d_rho = k_rho.d_view;
+ d_fp = k_fp.d_view;
+ h_rho = k_rho.h_view;
+ h_fp = k_fp.h_view;
+ }
+
+ x = atomKK->k_x.view();
+ f = atomKK->k_f.view();
+ v_rho = k_rho.view();
+ type = atomKK->k_type.view();
+ tag = atomKK->k_tag.view();
+ nlocal = atom->nlocal;
+ nall = atom->nlocal + atom->nghost;
+ newton_pair = force->newton_pair;
+
+ NeighListKokkos* k_list = static_cast*>(list);
+ d_numneigh = k_list->d_numneigh;
+ d_neighbors = k_list->d_neighbors;
+ d_ilist = k_list->d_ilist;
+ int inum = list->inum;
+
+ // Call cleanup_copy which sets allocations NULL which are destructed by the PairStyle
+
+ k_list->clean_copy();
+ copymode = 1;
+
+ // zero out density
+
+ if (newton_pair)
+ Kokkos::parallel_for(Kokkos::RangePolicy(0,nall),*this);
+ else
+ Kokkos::parallel_for(Kokkos::RangePolicy(0,nlocal),*this);
+ DeviceType::fence();
+
+ // loop over neighbors of my atoms
+
+ EV_FLOAT ev;
+
+ // compute kernel A
+
+ if (neighflag == HALF || neighflag == HALFTHREAD) {
+
+ if (neighflag == HALF) {
+ if (newton_pair) {
+ Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this);
+ } else {
+ Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this);
+ }
+ } else if (neighflag == HALFTHREAD) {
+ if (newton_pair) {
+ Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this);
+ } else {
+ Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this);
+ }
+ }
+ DeviceType::fence();
+
+ // communicate and sum densities (on the host)
+
+ if (newton_pair) {
+ k_rho.template modify();
+ k_rho.template sync();
+ comm->reverse_comm_pair(this);
+ k_rho.template modify();
+ k_rho.template sync();
+ }
+
+ // compute kernel B
+
+ if (eflag)
+ Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev);
+ else
+ Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this);
+ DeviceType::fence();
+
+ } else if (neighflag == FULL) {
+
+ // compute kernel AB
+
+ if (eflag)
+ Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev);
+ else
+ Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this);
+ DeviceType::fence();
+ }
+
+ if (eflag) {
+ eng_vdwl += ev.evdwl;
+ ev.evdwl = 0.0;
+ }
+
+ // communicate derivative of embedding function (on the device)
+
+ comm->forward_comm_pair(this);
+
+ // compute kernel C
+
+ if (evflag) {
+ if (neighflag == HALF) {
+ if (newton_pair) {
+ Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev);
+ } else {
+ Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev);
+ }
+ } else if (neighflag == HALFTHREAD) {
+ if (newton_pair) {
+ Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev);
+ } else {
+ Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev);
+ }
+ } else if (neighflag == FULL) {
+ if (newton_pair) {
+ Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev);
+ } else {
+ Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev);
+ }
+ }
+ } else {
+ if (neighflag == HALF) {
+ if (newton_pair) {
+ Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this);
+ } else {
+ Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this);
+ }
+ } else if (neighflag == HALFTHREAD) {
+ if (newton_pair) {
+ Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this);
+ } else {
+ Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this);
+ }
+ } else if (neighflag == FULL) {
+ if (newton_pair) {
+ Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this);
+ } else {
+ Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this);
+ }
+ }
+ }
+ DeviceType::fence();
+
+ if (eflag_global) eng_vdwl += ev.evdwl;
+ if (vflag_global) {
+ virial[0] += ev.v[0];
+ virial[1] += ev.v[1];
+ virial[2] += ev.v[2];
+ virial[3] += ev.v[3];
+ virial[4] += ev.v[4];
+ virial[5] += ev.v[5];
+ }
+
+ if (vflag_fdotr) pair_virial_fdotr_compute(this);
+
+ if (eflag_atom) {
+ k_eatom.template modify();
+ k_eatom.template sync();
+ }
+
+ if (vflag_atom) {
+ k_vatom.template modify();
+ k_vatom.template sync();
+ }
+
+ copymode = 0;
+}
+
+/* ----------------------------------------------------------------------
+ init specific to this pair style
+------------------------------------------------------------------------- */
+
+template
+void PairEAMAlloyKokkos::init_style()
+{
+ // convert read-in file(s) to arrays and spline them
+
+ PairEAM::init_style();
+
+ // irequest = neigh request made by parent class
+
+ neighflag = lmp->kokkos->neighflag;
+ int irequest = neighbor->nrequest - 1;
+
+ neighbor->requests[irequest]->
+ kokkos_host = Kokkos::Impl::is_same::value &&
+ !Kokkos::Impl::is_same::value;
+ neighbor->requests[irequest]->
+ kokkos_device = Kokkos::Impl::is_same::value;
+
+ if (neighflag == FULL) {
+ neighbor->requests[irequest]->full = 1;
+ neighbor->requests[irequest]->half = 0;
+ neighbor->requests[irequest]->full_cluster = 0;
+ } else if (neighflag == HALF || neighflag == HALFTHREAD) {
+ neighbor->requests[irequest]->full = 0;
+ neighbor->requests[irequest]->half = 1;
+ neighbor->requests[irequest]->full_cluster = 0;
+ } else {
+ error->all(FLERR,"Cannot use chosen neighbor list style with pair eam/kk/alloy");
+ }
+
+}
+
+template
+void PairEAMAlloyKokkos::file2array()
+{
+ file2array_alloy();
+
+ int i,j;
+ int n = atom->ntypes;
+
+ DAT::tdual_int_1d k_type2frho = DAT::tdual_int_1d("pair:type2frho",n+1);
+ DAT::tdual_int_2d k_type2rhor = DAT::tdual_int_2d("pair:type2rhor",n+1,n+1);
+ DAT::tdual_int_2d k_type2z2r = DAT::tdual_int_2d("pair:type2z2r",n+1,n+1);
+
+ HAT::t_int_1d h_type2frho = k_type2frho.h_view;
+ HAT::t_int_2d h_type2rhor = k_type2rhor.h_view;
+ HAT::t_int_2d h_type2z2r = k_type2z2r.h_view;
+
+ for (i = 1; i <= n; i++) {
+ h_type2frho[i] = type2frho[i];
+ for (j = 1; j <= n; j++) {
+ h_type2rhor(i,j) = type2rhor[i][j];
+ h_type2z2r(i,j) = type2z2r[i][j];
+ }
+ }
+ k_type2frho.template modify();
+ k_type2frho.template sync();
+ k_type2rhor.template modify();
+ k_type2rhor.template sync();
+ k_type2z2r.template modify();
+ k_type2z2r.template sync();
+
+ d_type2frho = k_type2frho.d_view;
+ d_type2rhor = k_type2rhor.d_view;
+ d_type2z2r = k_type2z2r.d_view;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template
+void PairEAMAlloyKokkos::array2spline()
+{
+ rdr = 1.0/dr;
+ rdrho = 1.0/drho;
+
+ tdual_ffloat_2d_n7 k_frho_spline = tdual_ffloat_2d_n7("pair:frho",nfrho,nrho+1);
+ tdual_ffloat_2d_n7 k_rhor_spline = tdual_ffloat_2d_n7("pair:rhor",nrhor,nr+1);
+ tdual_ffloat_2d_n7 k_z2r_spline = tdual_ffloat_2d_n7("pair:z2r",nz2r,nr+1);
+
+ t_host_ffloat_2d_n7 h_frho_spline = k_frho_spline.h_view;
+ t_host_ffloat_2d_n7 h_rhor_spline = k_rhor_spline.h_view;
+ t_host_ffloat_2d_n7 h_z2r_spline = k_z2r_spline.h_view;
+
+ for (int i = 0; i < nfrho; i++)
+ interpolate(nrho,drho,frho[i],h_frho_spline,i);
+ k_frho_spline.template modify();
+ k_frho_spline.template sync();
+
+ for (int i = 0; i < nrhor; i++)
+ interpolate(nr,dr,rhor[i],h_rhor_spline,i);
+ k_rhor_spline.template modify();
+ k_rhor_spline.template sync();
+
+ for (int i = 0; i < nz2r; i++)
+ interpolate(nr,dr,z2r[i],h_z2r_spline,i);
+ k_z2r_spline.template modify();
+ k_z2r_spline.template sync();
+
+ d_frho_spline = k_frho_spline.d_view;
+ d_rhor_spline = k_rhor_spline.d_view;
+ d_z2r_spline = k_z2r_spline.d_view;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template
+void PairEAMAlloyKokkos::interpolate(int n, double delta, double *f, t_host_ffloat_2d_n7 h_spline, int i)
+{
+ for (int m = 1; m <= n; m++) h_spline(i,m,6) = f[m];
+
+ h_spline(i,1,5) = h_spline(i,2,6) - h_spline(i,1,6);
+ h_spline(i,2,5) = 0.5 * (h_spline(i,3,6)-h_spline(i,1,6));
+ h_spline(i,n-1,5) = 0.5 * (h_spline(i,n,6)-h_spline(i,n-2,6));
+ h_spline(i,n,5) = h_spline(i,n,6) - h_spline(i,n-1,6);
+
+ for (int m = 3; m <= n-2; m++)
+ h_spline(i,m,5) = ((h_spline(i,m-2,6)-h_spline(i,m+2,6)) +
+ 8.0*(h_spline(i,m+1,6)-h_spline(i,m-1,6))) / 12.0;
+
+ for (int m = 1; m <= n-1; m++) {
+ h_spline(i,m,4) = 3.0*(h_spline(i,m+1,6)-h_spline(i,m,6)) -
+ 2.0*h_spline(i,m,5) - h_spline(i,m+1,5);
+ h_spline(i,m,3) = h_spline(i,m,5) + h_spline(i,m+1,5) -
+ 2.0*(h_spline(i,m+1,6)-h_spline(i,m,6));
+ }
+
+ h_spline(i,n,4) = 0.0;
+ h_spline(i,n,3) = 0.0;
+
+ for (int m = 1; m <= n; m++) {
+ h_spline(i,m,2) = h_spline(i,m,5)/delta;
+ h_spline(i,m,1) = 2.0*h_spline(i,m,4)/delta;
+ h_spline(i,m,0) = 3.0*h_spline(i,m,3)/delta;
+ }
+}
+
+/* ---------------------------------------------------------------------- */
+
+template
+int PairEAMAlloyKokkos::pack_forward_comm_kokkos(int n, DAT::tdual_int_2d k_sendlist, int iswap_in, DAT::tdual_xfloat_1d &buf,
+ int pbc_flag, int *pbc)
+{
+ d_sendlist = k_sendlist.view();
+ iswap = iswap_in;
+ v_buf = buf.view();
+ Kokkos::parallel_for(Kokkos::RangePolicy(0,n),*this);
+ DeviceType::fence();
+ return n;
+}
+
+template
+KOKKOS_INLINE_FUNCTION
+void PairEAMAlloyKokkos::operator()(TagPairEAMAlloyPackForwardComm, const int &i) const {
+ int j = d_sendlist(iswap, i);
+ v_buf[i] = d_fp[j];
+}
+
+/* ---------------------------------------------------------------------- */
+
+template
+void PairEAMAlloyKokkos::unpack_forward_comm_kokkos(int n, int first_in, DAT::tdual_xfloat_1d &buf)
+{
+ first = first_in;
+ v_buf = buf.view();
+ Kokkos::parallel_for(Kokkos::RangePolicy(0,n),*this);
+ DeviceType::fence();
+}
+
+template
+KOKKOS_INLINE_FUNCTION
+void PairEAMAlloyKokkos::operator()(TagPairEAMAlloyUnpackForwardComm, const int &i) const {
+ d_fp[i + first] = v_buf[i];
+}
+
+/* ---------------------------------------------------------------------- */
+
+template
+int PairEAMAlloyKokkos::pack_forward_comm(int n, int *list, double *buf,
+ int pbc_flag, int *pbc)
+{
+ int i,j;
+
+ for (i = 0; i < n; i++) {
+ j = list[i];
+ buf[i] = h_fp[j];
+ }
+ return n;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template
+void PairEAMAlloyKokkos::unpack_forward_comm(int n, int first, double *buf)
+{
+ for (int i = 0; i < n; i++) {
+ h_fp[i + first] = buf[i];
+ }
+}
+
+/* ---------------------------------------------------------------------- */
+
+template
+int PairEAMAlloyKokkos::pack_reverse_comm(int n, int first, double *buf)
+{
+ int i,m,last;
+
+ m = 0;
+ last = first + n;
+ for (i = first; i < last; i++) buf[m++] = h_rho[i];
+ return m;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template
+void PairEAMAlloyKokkos::unpack_reverse_comm(int n, int *list, double *buf)
+{
+ int i,j,m;
+
+ m = 0;
+ for (i = 0; i < n; i++) {
+ j = list[i];
+ h_rho[j] += buf[m++];
+ }
+}
+
+/* ---------------------------------------------------------------------- */
+
+template
+KOKKOS_INLINE_FUNCTION
+void PairEAMAlloyKokkos::operator()(TagPairEAMAlloyInitialize, const int &i) const {
+ d_rho[i] = 0.0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+////Specialisation for Neighborlist types Half, HalfThread, Full
+template
+template
+KOKKOS_INLINE_FUNCTION
+void PairEAMAlloyKokkos::operator()(TagPairEAMAlloyKernelA, const int &ii) const {
+
+ // rho = density at each atom
+ // loop over neighbors of my atoms
+
+ // The rho array is atomic for Half/Thread neighbor style
+ Kokkos::View::value> > rho = v_rho;
+
+ const int i = d_ilist[ii];
+ const X_FLOAT xtmp = x(i,0);
+ const X_FLOAT ytmp = x(i,1);
+ const X_FLOAT ztmp = x(i,2);
+ const int itype = type(i);
+
+ //const AtomNeighborsConst d_neighbors_i = k_list.get_neighbors_const(i);
+ const int jnum = d_numneigh[i];
+
+ F_FLOAT rhotmp = 0.0;
+
+ for (int jj = 0; jj < jnum; jj++) {
+ //int j = d_neighbors_i[jj];
+ int j = d_neighbors(i,jj);
+ j &= NEIGHMASK;
+ const X_FLOAT delx = xtmp - x(j,0);
+ const X_FLOAT dely = ytmp - x(j,1);
+ const X_FLOAT delz = ztmp - x(j,2);
+ const int jtype = type(j);
+ const F_FLOAT rsq = delx*delx + dely*dely + delz*delz;
+
+ if (rsq < cutforcesq) {
+ F_FLOAT p = sqrt(rsq)*rdr + 1.0;
+ int m = static_cast (p);
+ m = MIN(m,nr-1);
+ p -= m;
+ p = MIN(p,1.0);
+ const int d_type2rhor_ji = d_type2rhor(jtype,itype);
+ rhotmp += ((d_rhor_spline(d_type2rhor_ji,m,3)*p + d_rhor_spline(d_type2rhor_ji,m,4))*p +
+ d_rhor_spline(d_type2rhor_ji,m,5))*p + d_rhor_spline(d_type2rhor_ji,m,6);
+ if (NEWTON_PAIR || j < nlocal) {
+ const int d_type2rhor_ij = d_type2rhor(itype,jtype);
+ rho[j] += ((d_rhor_spline(d_type2rhor_ij,m,3)*p + d_rhor_spline(d_type2rhor_ij,m,4))*p +
+ d_rhor_spline(d_type2rhor_ij,m,5))*p + d_rhor_spline(d_type2rhor_ij,m,6);
+ }
+ }
+
+ }
+ rho[i] += rhotmp;
+}
+
+/* ---------------------------------------------------------------------- */
+
+////Specialisation for Neighborlist types Half, HalfThread, Full
+template
+template
+KOKKOS_INLINE_FUNCTION
+void PairEAMAlloyKokkos::operator()(TagPairEAMAlloyKernelB, const int &ii, EV_FLOAT& ev) const {
+
+ // fp = derivative of embedding energy at each atom
+ // phi = embedding energy at each atom
+ // if rho > rhomax (e.g. due to close approach of two atoms),
+ // will exceed table, so add linear term to conserve energy
+
+ const int i = d_ilist[ii];
+ const int itype = type(i);
+
+ F_FLOAT p = d_rho[i]*rdrho + 1.0;
+ int m = static_cast (p);
+ m = MAX(1,MIN(m,nrho-1));
+ p -= m;
+ p = MIN(p,1.0);
+ const int d_type2frho_i = d_type2frho[itype];
+ d_fp[i] = (d_frho_spline(d_type2frho_i,m,0)*p + d_frho_spline(d_type2frho_i,m,1))*p + d_frho_spline(d_type2frho_i,m,2);
+ if (EFLAG) {
+ F_FLOAT phi = ((d_frho_spline(d_type2frho_i,m,3)*p + d_frho_spline(d_type2frho_i,m,4))*p +
+ d_frho_spline(d_type2frho_i,m,5))*p + d_frho_spline(d_type2frho_i,m,6);
+ if (d_rho[i] > rhomax) phi += d_fp[i] * (d_rho[i]-rhomax);
+ if (eflag_global) ev.evdwl += phi;
+ if (eflag_atom) d_eatom[i] += phi;
+ }
+
+}
+
+template
+template
+KOKKOS_INLINE_FUNCTION
+void PairEAMAlloyKokkos::operator()(TagPairEAMAlloyKernelB, const int &ii) const {
+ EV_FLOAT ev;
+ this->template operator()(TagPairEAMAlloyKernelB(), ii, ev);
+}
+
+/* ---------------------------------------------------------------------- */
+
+////Specialisation for Neighborlist types Half, HalfThread, Full
+template
+template
+KOKKOS_INLINE_FUNCTION
+void PairEAMAlloyKokkos::operator()(TagPairEAMAlloyKernelAB, const int &ii, EV_FLOAT& ev) const {
+
+ // rho = density at each atom
+ // loop over neighbors of my atoms
+
+ const int i = d_ilist[ii];
+ const X_FLOAT xtmp = x(i,0);
+ const X_FLOAT ytmp = x(i,1);
+ const X_FLOAT ztmp = x(i,2);
+ const int itype = type(i);
+
+ //const AtomNeighborsConst d_neighbors_i = k_list.get_neighbors_const(i);
+ const int jnum = d_numneigh[i];
+
+ F_FLOAT rhotmp = 0.0;
+
+ for (int jj = 0; jj < jnum; jj++) {
+ //int j = d_neighbors_i[jj];
+ int j = d_neighbors(i,jj);
+ j &= NEIGHMASK;
+ const X_FLOAT delx = xtmp - x(j,0);
+ const X_FLOAT dely = ytmp - x(j,1);
+ const X_FLOAT delz = ztmp - x(j,2);
+ const int jtype = type(j);
+ const F_FLOAT rsq = delx*delx + dely*dely + delz*delz;
+
+ if (rsq < cutforcesq) {
+ F_FLOAT p = sqrt(rsq)*rdr + 1.0;
+ int m = static_cast (p);
+ m = MIN(m,nr-1);
+ p -= m;
+ p = MIN(p,1.0);
+ const int d_type2rhor_ji = d_type2rhor(jtype,itype);
+ rhotmp += ((d_rhor_spline(d_type2rhor_ji,m,3)*p + d_rhor_spline(d_type2rhor_ji,m,4))*p +
+ d_rhor_spline(d_type2rhor_ji,m,5))*p + d_rhor_spline(d_type2rhor_ji,m,6);
+ }
+
+ }
+ d_rho[i] += rhotmp;
+
+ // fp = derivative of embedding energy at each atom
+ // phi = embedding energy at each atom
+ // if rho > rhomax (e.g. due to close approach of two atoms),
+ // will exceed table, so add linear term to conserve energy
+
+ F_FLOAT p = d_rho[i]*rdrho + 1.0;
+ int m = static_cast (p);
+ m = MAX(1,MIN(m,nrho-1));
+ p -= m;
+ p = MIN(p,1.0);
+ const int d_type2frho_i = d_type2frho[itype];
+ d_fp[i] = (d_frho_spline(d_type2frho_i,m,0)*p + d_frho_spline(d_type2frho_i,m,1))*p + d_frho_spline(d_type2frho_i,m,2);
+ if (EFLAG) {
+ F_FLOAT phi = ((d_frho_spline(d_type2frho_i,m,3)*p + d_frho_spline(d_type2frho_i,m,4))*p +
+ d_frho_spline(d_type2frho_i,m,5))*p + d_frho_spline(d_type2frho_i,m,6);
+ if (d_rho[i] > rhomax) phi += d_fp[i] * (d_rho[i]-rhomax);
+ if (eflag_global) ev.evdwl += phi;
+ if (eflag_atom) d_eatom[i] += phi;
+ }
+
+}
+
+template
+template
+KOKKOS_INLINE_FUNCTION
+void PairEAMAlloyKokkos::operator()(TagPairEAMAlloyKernelAB, const int &ii) const {
+ EV_FLOAT ev;
+ this->template operator()(TagPairEAMAlloyKernelAB(), ii, ev);
+}
+
+/* ---------------------------------------------------------------------- */
+
+////Specialisation for Neighborlist types Half, HalfThread, Full
+template
+template
+KOKKOS_INLINE_FUNCTION
+void PairEAMAlloyKokkos::operator()(TagPairEAMAlloyKernelC, const int &ii, EV_FLOAT& ev) const {
+
+ // The f array is atomic for Half/Thread neighbor style
+ Kokkos::View::value> > a_f = f;
+
+ const int i = d_ilist[ii];
+ const X_FLOAT xtmp = x(i,0);
+ const X_FLOAT ytmp = x(i,1);
+ const X_FLOAT ztmp = x(i,2);
+ const int itype = type(i);
+
+ //const AtomNeighborsConst d_neighbors_i = k_list.get_neighbors_const(i);
+ const int jnum = d_numneigh[i];
+
+ F_FLOAT fxtmp = 0.0;
+ F_FLOAT fytmp = 0.0;
+ F_FLOAT fztmp = 0.0;
+
+ for (int jj = 0; jj < jnum; jj++) {
+ //int j = d_neighbors_i[jj];
+ int j = d_neighbors(i,jj);
+ j &= NEIGHMASK;
+ const X_FLOAT delx = xtmp - x(j,0);
+ const X_FLOAT dely = ytmp - x(j,1);
+ const X_FLOAT delz = ztmp - x(j,2);
+ const int jtype = type(j);
+ const F_FLOAT rsq = delx*delx + dely*dely + delz*delz;
+
+ if(rsq < cutforcesq) {
+ const F_FLOAT r = sqrt(rsq);
+ F_FLOAT p = r*rdr + 1.0;
+ int m = static_cast (p);
+ m = MIN(m,nr-1);
+ p -= m;
+ p = MIN(p,1.0);
+
+ // rhoip = derivative of (density at atom j due to atom i)
+ // rhojp = derivative of (density at atom i due to atom j)
+ // phi = pair potential energy
+ // phip = phi'
+ // z2 = phi * r
+ // z2p = (phi * r)' = (phi' r) + phi
+ // psip needs both fp[i] and fp[j] terms since r_ij appears in two
+ // terms of embed eng: Fi(sum rho_ij) and Fj(sum rho_ji)
+ // hence embed' = Fi(sum rho_ij) rhojp + Fj(sum rho_ji) rhoip
+
+ const int d_type2rhor_ij = d_type2rhor(itype,jtype);
+ const F_FLOAT rhoip = (d_rhor_spline(d_type2rhor_ij,m,0)*p + d_rhor_spline(d_type2rhor_ij,m,1))*p +
+ d_rhor_spline(d_type2rhor_ij,m,2);
+ const int d_type2rhor_ji = d_type2rhor(jtype,itype);
+ const F_FLOAT rhojp = (d_rhor_spline(d_type2rhor_ji,m,0)*p + d_rhor_spline(d_type2rhor_ji,m,1))*p +
+ d_rhor_spline(d_type2rhor_ji,m,2);
+ const int d_type2z2r_ij = d_type2z2r(itype,jtype);
+ const F_FLOAT z2p = (d_z2r_spline(d_type2z2r_ij,m,0)*p + d_z2r_spline(d_type2z2r_ij,m,1))*p +
+ d_z2r_spline(d_type2z2r_ij,m,2);
+ const F_FLOAT z2 = ((d_z2r_spline(d_type2z2r_ij,m,3)*p + d_z2r_spline(d_type2z2r_ij,m,4))*p +
+ d_z2r_spline(d_type2z2r_ij,m,5))*p + d_z2r_spline(d_type2z2r_ij,m,6);
+
+ const F_FLOAT recip = 1.0/r;
+ const F_FLOAT phi = z2*recip;
+ const F_FLOAT phip = z2p*recip - phi*recip;
+ const F_FLOAT psip = d_fp[i]*rhojp + d_fp[j]*rhoip + phip;
+ const F_FLOAT fpair = -psip*recip;
+
+ fxtmp += delx*fpair;
+ fytmp += dely*fpair;
+ fztmp += delz*fpair;
+
+ if ((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD) && (NEWTON_PAIR || j < nlocal)) {
+ a_f(j,0) -= delx*fpair;
+ a_f(j,1) -= dely*fpair;
+ a_f(j,2) -= delz*fpair;
+ }
+
+ if (EVFLAG) {
+ if (eflag) {
+ ev.evdwl += (((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD)&&(NEWTON_PAIR||(jtemplate ev_tally(ev,i,j,phi,fpair,delx,dely,delz);
+ }
+
+ }
+ }
+
+ a_f(i,0) += fxtmp;
+ a_f(i,1) += fytmp;
+ a_f(i,2) += fztmp;
+}
+
+template
+template
+KOKKOS_INLINE_FUNCTION
+void PairEAMAlloyKokkos::operator()(TagPairEAMAlloyKernelC, const int &ii) const {
+ EV_FLOAT ev;
+ this->template operator()(TagPairEAMAlloyKernelC(), ii, ev);
+}
+
+/* ---------------------------------------------------------------------- */
+
+template
+template
+KOKKOS_INLINE_FUNCTION
+void PairEAMAlloyKokkos::ev_tally(EV_FLOAT &ev, const int &i, const int &j,
+ const F_FLOAT &epair, const F_FLOAT &fpair, const F_FLOAT &delx,
+ const F_FLOAT &dely, const F_FLOAT &delz) const
+{
+ const int EFLAG = eflag;
+ const int VFLAG = vflag_either;
+
+ // The eatom and vatom arrays are atomic for Half/Thread neighbor style
+ Kokkos::View::value> > v_eatom = k_eatom.view();
+ Kokkos::View::value> > v_vatom = k_vatom.view();
+
+ if (EFLAG) {
+ if (eflag_atom) {
+ const E_FLOAT epairhalf = 0.5 * epair;
+ if (NEIGHFLAG!=FULL) {
+ if (NEWTON_PAIR || i < nlocal) v_eatom[i] += epairhalf;
+ if (NEWTON_PAIR || j < nlocal) v_eatom[j] += epairhalf;
+ } else {
+ v_eatom[i] += epairhalf;
+ }
+ }
+ }
+
+ if (VFLAG) {
+ const E_FLOAT v0 = delx*delx*fpair;
+ const E_FLOAT v1 = dely*dely*fpair;
+ const E_FLOAT v2 = delz*delz*fpair;
+ const E_FLOAT v3 = delx*dely*fpair;
+ const E_FLOAT v4 = delx*delz*fpair;
+ const E_FLOAT v5 = dely*delz*fpair;
+
+ if (vflag_global) {
+ if (NEIGHFLAG!=FULL) {
+ if (NEWTON_PAIR || i < nlocal) {
+ ev.v[0] += 0.5*v0;
+ ev.v[1] += 0.5*v1;
+ ev.v[2] += 0.5*v2;
+ ev.v[3] += 0.5*v3;
+ ev.v[4] += 0.5*v4;
+ ev.v[5] += 0.5*v5;
+ }
+ if (NEWTON_PAIR || j < nlocal) {
+ ev.v[0] += 0.5*v0;
+ ev.v[1] += 0.5*v1;
+ ev.v[2] += 0.5*v2;
+ ev.v[3] += 0.5*v3;
+ ev.v[4] += 0.5*v4;
+ ev.v[5] += 0.5*v5;
+ }
+ } else {
+ ev.v[0] += 0.5*v0;
+ ev.v[1] += 0.5*v1;
+ ev.v[2] += 0.5*v2;
+ ev.v[3] += 0.5*v3;
+ ev.v[4] += 0.5*v4;
+ ev.v[5] += 0.5*v5;
+ }
+ }
+
+ if (vflag_atom) {
+ if (NEIGHFLAG!=FULL) {
+ if (NEWTON_PAIR || i < nlocal) {
+ v_vatom(i,0) += 0.5*v0;
+ v_vatom(i,1) += 0.5*v1;
+ v_vatom(i,2) += 0.5*v2;
+ v_vatom(i,3) += 0.5*v3;
+ v_vatom(i,4) += 0.5*v4;
+ v_vatom(i,5) += 0.5*v5;
+ }
+ if (NEWTON_PAIR || j < nlocal) {
+ v_vatom(j,0) += 0.5*v0;
+ v_vatom(j,1) += 0.5*v1;
+ v_vatom(j,2) += 0.5*v2;
+ v_vatom(j,3) += 0.5*v3;
+ v_vatom(j,4) += 0.5*v4;
+ v_vatom(j,5) += 0.5*v5;
+ }
+ } else {
+ v_vatom(i,0) += 0.5*v0;
+ v_vatom(i,1) += 0.5*v1;
+ v_vatom(i,2) += 0.5*v2;
+ v_vatom(i,3) += 0.5*v3;
+ v_vatom(i,4) += 0.5*v4;
+ v_vatom(i,5) += 0.5*v5;
+ }
+ }
+ }
+}
+
+/* ---------------------------------------------------------------------- */
+
+// Duplicate PairEAMAlloy functions
+
+/* ----------------------------------------------------------------------
+ set coeffs for one or more type pairs
+ read DYNAMO setfl file
+------------------------------------------------------------------------- */
+
+template
+void PairEAMAlloyKokkos::coeff(int narg, char **arg)
+{
+ int i,j;
+
+ if (!allocated) allocate();
+
+ if (narg != 3 + atom->ntypes)
+ error->all(FLERR,"Incorrect args for pair coefficients");
+
+ // insure I,J args are * *
+
+ if (strcmp(arg[0],"*") != 0 || strcmp(arg[1],"*") != 0)
+ error->all(FLERR,"Incorrect args for pair coefficients");
+
+ // read EAM setfl file
+
+ if (setfl) {
+ for (i = 0; i < setfl->nelements; i++) delete [] setfl->elements[i];
+ delete [] setfl->elements;
+ delete [] setfl->mass;
+ memory->destroy(setfl->frho);
+ memory->destroy(setfl->rhor);
+ memory->destroy(setfl->z2r);
+ delete setfl;
+ }
+ setfl = new Setfl();
+ read_file(arg[2]);
+
+ // read args that map atom types to elements in potential file
+ // map[i] = which element the Ith atom type is, -1 if NULL
+
+ for (i = 3; i < narg; i++) {
+ if (strcmp(arg[i],"NULL") == 0) {
+ map[i-2] = -1;
+ continue;
+ }
+ for (j = 0; j < setfl->nelements; j++)
+ if (strcmp(arg[i],setfl->elements[j]) == 0) break;
+ if (j < setfl->nelements) map[i-2] = j;
+ else error->all(FLERR,"No matching element in EAM potential file");
+ }
+
+ // clear setflag since coeff() called once with I,J = * *
+
+ int n = atom->ntypes;
+ for (i = 1; i <= n; i++)
+ for (j = i; j <= n; j++)
+ setflag[i][j] = 0;
+
+ // set setflag i,j for type pairs where both are mapped to elements
+ // set mass of atom type if i = j
+
+ int count = 0;
+ for (i = 1; i <= n; i++) {
+ for (j = i; j <= n; j++) {
+ if (map[i] >= 0 && map[j] >= 0) {
+ setflag[i][j] = 1;
+ if (i == j) atom->set_mass(i,setfl->mass[map[i]]);
+ count++;
+ }
+ }
+ }
+
+ if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
+}
+
+/* ----------------------------------------------------------------------
+ read a multi-element DYNAMO setfl file
+------------------------------------------------------------------------- */
+
+template
+void PairEAMAlloyKokkos::read_file(char *filename)
+{
+ Setfl *file = setfl;
+
+ // open potential file
+
+ int me = comm->me;
+ FILE *fptr;
+ char line[MAXLINE];
+
+ if (me == 0) {
+ fptr = force->open_potential(filename);
+ if (fptr == NULL) {
+ char str[128];
+ sprintf(str,"Cannot open EAM potential file %s",filename);
+ error->one(FLERR,str);
+ }
+ }
+
+ // read and broadcast header
+ // extract element names from nelements line
+
+ int n;
+ if (me == 0) {
+ fgets(line,MAXLINE,fptr);
+ fgets(line,MAXLINE,fptr);
+ fgets(line,MAXLINE,fptr);
+ fgets(line,MAXLINE,fptr);
+ n = strlen(line) + 1;
+ }
+ MPI_Bcast(&n,1,MPI_INT,0,world);
+ MPI_Bcast(line,n,MPI_CHAR,0,world);
+
+ sscanf(line,"%d",&file->nelements);
+ int nwords = atom->count_words(line);
+ if (nwords != file->nelements + 1)
+ error->all(FLERR,"Incorrect element names in EAM potential file");
+
+ char **words = new char*[file->nelements+1];
+ nwords = 0;
+ strtok(line," \t\n\r\f");
+ while ((words[nwords++] = strtok(NULL," \t\n\r\f"))) continue;
+
+ file->elements = new char*[file->nelements];
+ for (int i = 0; i < file->nelements; i++) {
+ n = strlen(words[i]) + 1;
+ file->elements[i] = new char[n];
+ strcpy(file->elements[i],words[i]);
+ }
+ delete [] words;
+
+ if (me == 0) {
+ fgets(line,MAXLINE,fptr);
+ sscanf(line,"%d %lg %d %lg %lg",
+ &file->nrho,&file->drho,&file->nr,&file->dr,&file->cut);
+ }
+
+ MPI_Bcast(&file->nrho,1,MPI_INT,0,world);
+ MPI_Bcast(&file->drho,1,MPI_DOUBLE,0,world);
+ MPI_Bcast(&file->nr,1,MPI_INT,0,world);
+ MPI_Bcast(&file->dr,1,MPI_DOUBLE,0,world);
+ MPI_Bcast(&file->cut,1,MPI_DOUBLE,0,world);
+
+ file->mass = new double[file->nelements];
+ memory->create(file->frho,file->nelements,file->nrho+1,"pair:frho");
+ memory->create(file->rhor,file->nelements,file->nr+1,"pair:rhor");
+ memory->create(file->z2r,file->nelements,file->nelements,file->nr+1,
+ "pair:z2r");
+
+ int i,j,tmp;
+ for (i = 0; i < file->nelements; i++) {
+ if (me == 0) {
+ fgets(line,MAXLINE,fptr);
+ sscanf(line,"%d %lg",&tmp,&file->mass[i]);
+ }
+ MPI_Bcast(&file->mass[i],1,MPI_DOUBLE,0,world);
+
+ if (me == 0) grab(fptr,file->nrho,&file->frho[i][1]);
+ MPI_Bcast(&file->frho[i][1],file->nrho,MPI_DOUBLE,0,world);
+ if (me == 0) grab(fptr,file->nr,&file->rhor[i][1]);
+ MPI_Bcast(&file->rhor[i][1],file->nr,MPI_DOUBLE,0,world);
+ }
+
+ for (i = 0; i < file->nelements; i++)
+ for (j = 0; j <= i; j++) {
+ if (me == 0) grab(fptr,file->nr,&file->z2r[i][j][1]);
+ MPI_Bcast(&file->z2r[i][j][1],file->nr,MPI_DOUBLE,0,world);
+ }
+
+ // close the potential file
+
+ if (me == 0) fclose(fptr);
+}
+
+/* ----------------------------------------------------------------------
+ copy read-in setfl potential to standard array format
+------------------------------------------------------------------------- */
+
+template
+void PairEAMAlloyKokkos::file2array_alloy()
+{
+ int i,j,m,n;
+ int ntypes = atom->ntypes;
+
+ // set function params directly from setfl file
+
+ nrho = setfl->nrho;
+ nr = setfl->nr;
+ drho = setfl->drho;
+ dr = setfl->dr;
+ rhomax = (nrho-1) * drho;
+
+ // ------------------------------------------------------------------
+ // setup frho arrays
+ // ------------------------------------------------------------------
+
+ // allocate frho arrays
+ // nfrho = # of setfl elements + 1 for zero array
+
+ nfrho = setfl->nelements + 1;
+ memory->destroy(frho);
+ memory->create(frho,nfrho,nrho+1,"pair:frho");
+
+ // copy each element's frho to global frho
+
+ for (i = 0; i < setfl->nelements; i++)
+ for (m = 1; m <= nrho; m++) frho[i][m] = setfl->frho[i][m];
+
+ // add extra frho of zeroes for non-EAM types to point to (pair hybrid)
+ // this is necessary b/c fp is still computed for non-EAM atoms
+
+ for (m = 1; m <= nrho; m++) frho[nfrho-1][m] = 0.0;
+
+ // type2frho[i] = which frho array (0 to nfrho-1) each atom type maps to
+ // if atom type doesn't point to element (non-EAM atom in pair hybrid)
+ // then map it to last frho array of zeroes
+
+ for (i = 1; i <= ntypes; i++)
+ if (map[i] >= 0) type2frho[i] = map[i];
+ else type2frho[i] = nfrho-1;
+
+ // ------------------------------------------------------------------
+ // setup rhor arrays
+ // ------------------------------------------------------------------
+
+ // allocate rhor arrays
+ // nrhor = # of setfl elements
+
+ nrhor = setfl->nelements;
+ memory->destroy(rhor);
+ memory->create(rhor,nrhor,nr+1,"pair:rhor");
+
+ // copy each element's rhor to global rhor
+
+ for (i = 0; i < setfl->nelements; i++)
+ for (m = 1; m <= nr; m++) rhor[i][m] = setfl->rhor[i][m];
+
+ // type2rhor[i][j] = which rhor array (0 to nrhor-1) each type pair maps to
+ // for setfl files, I,J mapping only depends on I
+ // OK if map = -1 (non-EAM atom in pair hybrid) b/c type2rhor not used
+
+ for (i = 1; i <= ntypes; i++)
+ for (j = 1; j <= ntypes; j++)
+ type2rhor[i][j] = map[i];
+
+ // ------------------------------------------------------------------
+ // setup z2r arrays
+ // ------------------------------------------------------------------
+
+ // allocate z2r arrays
+ // nz2r = N*(N+1)/2 where N = # of setfl elements
+
+ nz2r = setfl->nelements * (setfl->nelements+1) / 2;
+ memory->destroy(z2r);
+ memory->create(z2r,nz2r,nr+1,"pair:z2r");
+
+ // copy each element pair z2r to global z2r, only for I >= J
+
+ n = 0;
+ for (i = 0; i < setfl->nelements; i++)
+ for (j = 0; j <= i; j++) {
+ for (m = 1; m <= nr; m++) z2r[n][m] = setfl->z2r[i][j][m];
+ n++;
+ }
+
+ // type2z2r[i][j] = which z2r array (0 to nz2r-1) each type pair maps to
+ // set of z2r arrays only fill lower triangular Nelement matrix
+ // value = n = sum over rows of lower-triangular matrix until reach irow,icol
+ // swap indices when irow < icol to stay lower triangular
+ // if map = -1 (non-EAM atom in pair hybrid):
+ // type2z2r is not used by non-opt
+ // but set type2z2r to 0 since accessed by opt
+
+ int irow,icol;
+ for (i = 1; i <= ntypes; i++) {
+ for (j = 1; j <= ntypes; j++) {
+ irow = map[i];
+ icol = map[j];
+ if (irow == -1 || icol == -1) {
+ type2z2r[i][j] = 0;
+ continue;
+ }
+ if (irow < icol) {
+ irow = map[j];
+ icol = map[i];
+ }
+ n = 0;
+ for (m = 0; m < irow; m++) n += m + 1;
+ n += icol;
+ type2z2r[i][j] = n;
+ }
+ }
+}
+
+/* ---------------------------------------------------------------------- */
+
+template class PairEAMAlloyKokkos;
+#ifdef KOKKOS_HAVE_CUDA
+template class PairEAMAlloyKokkos;
+#endif
\ No newline at end of file
diff --git a/src/KOKKOS/pair_eam_alloy_kokkos.h b/src/KOKKOS/pair_eam_alloy_kokkos.h
new file mode 100755
index 0000000000..2d48f0fde5
--- /dev/null
+++ b/src/KOKKOS/pair_eam_alloy_kokkos.h
@@ -0,0 +1,183 @@
+/* -*- c++ -*- ----------------------------------------------------------
+
+ LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+ http://lammps.sandia.gov, Sandia National Laboratories
+ Steve Plimpton, sjplimp@sandia.gov
+
+ Copyright (2003) Sandia Corporation. Under the terms of Contract
+ DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+ certain rights in this software. This software is distributed under
+ the GNU General Public License.
+
+ See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(eam/alloy/kk,PairEAMAlloyKokkos)
+PairStyle(eam/alloy/kk/device,PairEAMAlloyKokkos)
+PairStyle(eam/alloy/kk/host,PairEAMAlloyKokkos)
+
+#else
+
+#ifndef LMP_PAIR_EAM_ALLOY_KOKKOS_H
+#define LMP_PAIR_EAM_ALLOY_KOKKOS_H
+
+#include "stdio.h"
+#include "pair_kokkos.h"
+#include "pair_eam.h"
+#include "neigh_list_kokkos.h"
+
+namespace LAMMPS_NS {
+
+struct TagPairEAMAlloyPackForwardComm{};
+struct TagPairEAMAlloyUnpackForwardComm{};
+struct TagPairEAMAlloyInitialize{};
+
+template
+struct TagPairEAMAlloyKernelA{};
+
+template
+struct TagPairEAMAlloyKernelB{};
+
+template
+struct TagPairEAMAlloyKernelAB{};
+
+template
+struct TagPairEAMAlloyKernelC{};
+
+// Cannot use virtual inheritance on the GPU
+
+template
+class PairEAMAlloyKokkos : public PairEAM {
+ public:
+ enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF};
+ enum {COUL_FLAG=0};
+ typedef DeviceType device_type;
+ typedef ArrayTypes AT;
+ typedef EV_FLOAT value_type;
+
+ PairEAMAlloyKokkos(class LAMMPS *);
+ virtual ~PairEAMAlloyKokkos();
+ virtual void compute(int, int);
+ void init_style();
+ void coeff(int, char **);
+
+ KOKKOS_INLINE_FUNCTION
+ void operator()(TagPairEAMAlloyPackForwardComm, const int&) const;
+
+ KOKKOS_INLINE_FUNCTION
+ void operator()(TagPairEAMAlloyUnpackForwardComm, const int&) const;
+
+ KOKKOS_INLINE_FUNCTION
+ void operator()(TagPairEAMAlloyInitialize, const int&) const;
+
+ template
+ KOKKOS_INLINE_FUNCTION
+ void operator()(TagPairEAMAlloyKernelA, const int&) const;
+
+ template
+ KOKKOS_INLINE_FUNCTION
+ void operator()(TagPairEAMAlloyKernelB, const int&, EV_FLOAT&) const;
+
+ template
+ KOKKOS_INLINE_FUNCTION
+ void operator()(TagPairEAMAlloyKernelB, const int&) const;
+
+ template
+ KOKKOS_INLINE_FUNCTION
+ void operator()(TagPairEAMAlloyKernelAB, const int&, EV_FLOAT&) const;
+
+ template
+ KOKKOS_INLINE_FUNCTION
+ void operator()(TagPairEAMAlloyKernelAB, const int&) const;
+
+ template
+ KOKKOS_INLINE_FUNCTION
+ void operator()(TagPairEAMAlloyKernelC, const int&, EV_FLOAT&) const;
+
+ template
+ KOKKOS_INLINE_FUNCTION
+ void operator()(TagPairEAMAlloyKernelC, const int&) const;
+
+ template
+ KOKKOS_INLINE_FUNCTION
+ void ev_tally(EV_FLOAT &ev, const int &i, const int &j,
+ const F_FLOAT &epair, const F_FLOAT &fpair, const F_FLOAT &delx,
+ const F_FLOAT &dely, const F_FLOAT &delz) const;
+
+ virtual int pack_forward_comm_kokkos(int, DAT::tdual_int_2d, int, DAT::tdual_xfloat_1d&,
+ int, int *);
+ virtual void unpack_forward_comm_kokkos(int, int, DAT::tdual_xfloat_1d&);
+ virtual int pack_forward_comm(int, int *, double *, int, int *);
+ virtual void unpack_forward_comm(int, int, double *);
+ int pack_reverse_comm(int, int, double *);
+ void unpack_reverse_comm(int, int *, double *);
+
+ protected:
+ void cleanup_copy();
+
+ typename AT::t_x_array_randomread x;
+ typename AT::t_f_array f;
+ typename AT::t_int_1d_randomread type;
+ typename AT::t_tagint_1d tag;
+
+ DAT::tdual_efloat_1d k_eatom;
+ DAT::tdual_virial_array k_vatom;
+ DAT::t_efloat_1d d_eatom;
+ DAT::t_virial_array d_vatom;
+
+ DAT::tdual_ffloat_1d k_rho;
+ DAT::tdual_ffloat_1d k_fp;
+ DAT::t_ffloat_1d d_rho;
+ typename AT::t_ffloat_1d v_rho;
+ DAT::t_ffloat_1d d_fp;
+ HAT::t_ffloat_1d h_rho;
+ HAT::t_ffloat_1d h_fp;
+
+ DAT::t_int_1d_randomread d_type2frho;
+ DAT::t_int_2d_randomread d_type2rhor;
+ DAT::t_int_2d_randomread d_type2z2r;
+
+ typedef Kokkos::DualView tdual_ffloat_2d_n7;
+ typedef typename tdual_ffloat_2d_n7::t_dev_const_randomread t_ffloat_2d_n7_randomread;
+ typedef typename tdual_ffloat_2d_n7::t_host t_host_ffloat_2d_n7;
+
+ t_ffloat_2d_n7_randomread d_frho_spline;
+ t_ffloat_2d_n7_randomread d_rhor_spline;
+ t_ffloat_2d_n7_randomread d_z2r_spline;
+
+ virtual void file2array();
+ void file2array_alloy();
+ void array2spline();
+ void interpolate(int, double, double *, t_host_ffloat_2d_n7, int);
+ void read_file(char *);
+
+ typename ArrayTypes::t_neighbors_2d d_neighbors;
+ typename ArrayTypes::t_int_1d_randomread d_ilist;
+ typename ArrayTypes::t_int_1d_randomread d_numneigh;
+ //NeighListKokkos k_list;
+
+ int iswap;
+ int first;
+ typename AT::t_int_2d d_sendlist;
+ typename AT::t_xfloat_1d_um v_buf;
+
+ int neighflag,newton_pair;
+ int nlocal,nall,eflag,vflag;
+
+ friend void pair_virial_fdotr_compute(PairEAMAlloyKokkos*);
+};
+
+}
+
+#endif
+#endif
+
+/* ERROR/WARNING messages:
+
+E: Cannot use chosen neighbor list style with pair eam/kk/alloy
+
+That style is not supported by Kokkos.
+
+*/
diff --git a/src/KOKKOS/pair_eam_fs_kokkos.cpp b/src/KOKKOS/pair_eam_fs_kokkos.cpp
new file mode 100755
index 0000000000..a95deb8542
--- /dev/null
+++ b/src/KOKKOS/pair_eam_fs_kokkos.cpp
@@ -0,0 +1,1186 @@
+/* ----------------------------------------------------------------------
+ LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+ http://lammps.sandia.gov, Sandia National Laboratories
+ Steve Plimpton, sjplimp@sandia.gov
+
+ Copyright (2003) Sandia Corporation. Under the terms of Contract
+ DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+ certain rights in this software. This software is distributed under
+ the GNU General Public License.
+
+ See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+ Contributing authors: Stan Moore (SNL)
+------------------------------------------------------------------------- */
+
+#include "math.h"
+#include "stdio.h"
+#include "stdlib.h"
+#include "string.h"
+#include "kokkos.h"
+#include "pair_kokkos.h"
+#include "pair_eam_fs_kokkos.h"
+#include "atom_kokkos.h"
+#include "force.h"
+#include "comm.h"
+#include "neighbor.h"
+#include "neigh_list_kokkos.h"
+#include "neigh_request.h"
+#include "memory.h"
+#include "error.h"
+#include "atom_masks.h"
+
+using namespace LAMMPS_NS;
+
+#define MAXLINE 1024
+
+// Cannot use virtual inheritance on the GPU, so must duplicate code
+
+/* ---------------------------------------------------------------------- */
+
+template
+PairEAMFSKokkos