diff --git a/doc/src/package.rst b/doc/src/package.rst index 437601dc60..a037c91999 100644 --- a/doc/src/package.rst +++ b/doc/src/package.rst @@ -81,6 +81,9 @@ Syntax *neigh/thread* value = *off* or *on* off = thread only over atoms on = thread over both atoms and neighbors + *neigh/transpose* value = *off* or *on* + off = use same memory layout for GPU neigh list build as pair style + on = use transposed memory layout for GPU neigh list build *newton* = *off* or *on* off = set Newton pairwise and bonded flags off on = set Newton pairwise and bonded flags on @@ -463,6 +466,16 @@ potentials support this keyword yet, and only thread over atoms. Many simple pairwise potentials such as Lennard-Jones do support threading over both atoms and neighbors. +If the *neigh/transpose* keyword is set to *off*, then the KOKKOS +package will use the same memory layout for building the neigh list on +GPUs as used for the pair style. When this keyword is set to *on* it +will use a different (transposed) memory layout to build the neigh +list on GPUs. This can be faster in some cases (e.g. ReaxFF HNS +benchmark) but slower in others (e.g. Lennard Jones benchmark). The +copy between different memory layouts is done out of place and +therefore doubles the memory overhead of the neigh list, which can be +signicant. + The *newton* keyword sets the Newton flags for pairwise and bonded interactions to *off* or *on*, the same as the :doc:`newton ` command allows. The default for GPUs is *off* because this will almost @@ -681,15 +694,16 @@ script or via the "-pk intel" :doc:`command-line switch `. For the KOKKOS package, the option defaults for GPUs are neigh = full, neigh/qeq = full, newton = off, binsize for GPUs = 2x LAMMPS default -value, comm = device, gpu/aware = on. When LAMMPS can safely detect -that GPU-aware MPI is not available, the default value of gpu/aware -becomes "off". For CPUs or Xeon Phis, the option defaults are neigh = -half, neigh/qeq = half, newton = on, binsize = 0.0, and comm = no. The -option neigh/thread = on when there are 16K atoms or less on an MPI -rank, otherwise it is "off". These settings are made automatically by -the required "-k on" :doc:`command-line switch `. You can -change them by using the package kokkos command in your input script or -via the :doc:`-pk kokkos command-line switch `. +value, comm = device, neigh/transpose = off, gpu/aware = on. When +LAMMPS can safely detect that GPU-aware MPI is not available, the +default value of gpu/aware becomes "off". For CPUs or Xeon Phis, the +option defaults are neigh = half, neigh/qeq = half, newton = on, +binsize = 0.0, and comm = no. The option neigh/thread = on when there +are 16K atoms or less on an MPI rank, otherwise it is "off". These +settings are made automatically by the required "-k on" +:doc:`command-line switch `. You can change them by using +the package kokkos command in your input script or via the :doc:`-pk +kokkos command-line switch `. For the OMP package, the default is Nthreads = 0 and the option defaults are neigh = yes. These settings are made automatically if diff --git a/src/KOKKOS/Install.sh b/src/KOKKOS/Install.sh index 6da318c2d5..f56ab45914 100755 --- a/src/KOKKOS/Install.sh +++ b/src/KOKKOS/Install.sh @@ -316,6 +316,7 @@ action sna_kokkos.h sna.h action sna_kokkos_impl.h sna.cpp action third_order_kokkos.cpp dynamical_matrix.cpp action third_order_kokkos.h dynamical_matrix.h +action transpose_helper_kokkos.h action verlet_kokkos.cpp action verlet_kokkos.h diff --git a/src/KOKKOS/kokkos.cpp b/src/KOKKOS/kokkos.cpp index dc8a1173f3..382808d523 100644 --- a/src/KOKKOS/kokkos.cpp +++ b/src/KOKKOS/kokkos.cpp @@ -232,6 +232,7 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp) #endif neigh_thread = 0; neigh_thread_set = 0; + neigh_transpose = 0; if (ngpus > 0) { neighflag = FULL; neighflag_qeq = FULL; @@ -481,6 +482,10 @@ void KokkosLMP::accelerator(int narg, char **arg) neigh_thread = utils::logical(FLERR,arg[iarg+1],false,lmp); neigh_thread_set = 1; iarg += 2; + } else if (strcmp(arg[iarg],"neigh/transpose") == 0) { + if (iarg+2 > narg) error->all(FLERR,"Illegal package kokkos command"); + neigh_transpose = utils::logical(FLERR,arg[iarg+1],false,lmp); + iarg += 2; } else error->all(FLERR,"Illegal package kokkos command"); } diff --git a/src/KOKKOS/kokkos.h b/src/KOKKOS/kokkos.h index be4990d0a4..669a773734 100644 --- a/src/KOKKOS/kokkos.h +++ b/src/KOKKOS/kokkos.h @@ -46,6 +46,7 @@ class KokkosLMP : protected Pointers { int gpu_aware_flag; int neigh_thread; int neigh_thread_set; + int neigh_transpose; int newtonflag; double binsize; diff --git a/src/KOKKOS/kokkos_type.h b/src/KOKKOS/kokkos_type.h index 09a78b17df..22620f0d3d 100644 --- a/src/KOKKOS/kokkos_type.h +++ b/src/KOKKOS/kokkos_type.h @@ -887,6 +887,13 @@ typedef tdual_neighbors_2d::t_dev_um t_neighbors_2d_um; typedef tdual_neighbors_2d::t_dev_const_um t_neighbors_2d_const_um; typedef tdual_neighbors_2d::t_dev_const_randomread t_neighbors_2d_randomread; +typedef Kokkos::DualView tdual_neighbors_2d_lr; +typedef tdual_neighbors_2d_lr::t_dev t_neighbors_2d_lr; +typedef tdual_neighbors_2d_lr::t_dev_const t_neighbors_2d_const_lr; +typedef tdual_neighbors_2d_lr::t_dev_um t_neighbors_2d_um_lr; +typedef tdual_neighbors_2d_lr::t_dev_const_um t_neighbors_2d_const_um_lr; +typedef tdual_neighbors_2d_lr::t_dev_const_randomread t_neighbors_2d_randomread_lr; + }; #ifdef LMP_KOKKOS_GPU @@ -1156,6 +1163,13 @@ typedef tdual_neighbors_2d::t_host_um t_neighbors_2d_um; typedef tdual_neighbors_2d::t_host_const_um t_neighbors_2d_const_um; typedef tdual_neighbors_2d::t_host_const_randomread t_neighbors_2d_randomread; +typedef Kokkos::DualView tdual_neighbors_2d_lr; +typedef tdual_neighbors_2d_lr::t_host t_neighbors_2d_lr; +typedef tdual_neighbors_2d_lr::t_host_const t_neighbors_2d_const_lr; +typedef tdual_neighbors_2d_lr::t_host_um t_neighbors_2d_um_lr; +typedef tdual_neighbors_2d_lr::t_host_const_um t_neighbors_2d_const_um_lr; +typedef tdual_neighbors_2d_lr::t_host_const_randomread t_neighbors_2d_randomread_lr; + }; #endif //default LAMMPS Types diff --git a/src/KOKKOS/neigh_list_kokkos.cpp b/src/KOKKOS/neigh_list_kokkos.cpp index b8eeb38ffd..2337b1ef3d 100644 --- a/src/KOKKOS/neigh_list_kokkos.cpp +++ b/src/KOKKOS/neigh_list_kokkos.cpp @@ -13,6 +13,7 @@ ------------------------------------------------------------------------- */ #include "neigh_list_kokkos.h" +#include "kokkos.h" using namespace LAMMPS_NS; @@ -44,6 +45,11 @@ void NeighListKokkos::grow(int nmax) d_numneigh = typename ArrayTypes::t_int_1d("neighlist:numneigh",maxatoms); d_neighbors = typename ArrayTypes::t_neighbors_2d(); d_neighbors = typename ArrayTypes::t_neighbors_2d(Kokkos::NoInit("neighlist:neighbors"),maxatoms,maxneighs); + + if (lmp->kokkos->neigh_transpose) { + d_neighbors_transpose = typename ArrayTypes::t_neighbors_2d_lr(); + d_neighbors_transpose = typename ArrayTypes::t_neighbors_2d_lr(Kokkos::NoInit("neighlist:neighbors"),maxatoms,maxneighs); + } } /* ---------------------------------------------------------------------- */ diff --git a/src/KOKKOS/neigh_list_kokkos.h b/src/KOKKOS/neigh_list_kokkos.h index 04e5e7e17f..d06a3e76cf 100644 --- a/src/KOKKOS/neigh_list_kokkos.h +++ b/src/KOKKOS/neigh_list_kokkos.h @@ -70,6 +70,7 @@ public: void grow(int nmax); typename ArrayTypes::t_neighbors_2d d_neighbors; + typename ArrayTypes::t_neighbors_2d_lr d_neighbors_transpose; DAT::tdual_int_1d k_ilist; // local indices of I atoms typename ArrayTypes::t_int_1d d_ilist; typename ArrayTypes::t_int_1d d_numneigh; @@ -82,6 +83,12 @@ public: &d_neighbors(i,1)-&d_neighbors(i,0)); } + KOKKOS_INLINE_FUNCTION + AtomNeighbors get_neighbors_transpose(const int &i) const { + return AtomNeighbors(&d_neighbors_transpose(i,0),d_numneigh(i), + &d_neighbors_transpose(i,1)-&d_neighbors_transpose(i,0)); + } + KOKKOS_INLINE_FUNCTION static AtomNeighborsConst static_neighbors_const(int i, typename ArrayTypes::t_neighbors_2d_const const& d_neighbors, diff --git a/src/KOKKOS/npair_kokkos.cpp b/src/KOKKOS/npair_kokkos.cpp index 1825fd3abe..f42ca39404 100644 --- a/src/KOKKOS/npair_kokkos.cpp +++ b/src/KOKKOS/npair_kokkos.cpp @@ -21,6 +21,8 @@ #include "nbin_kokkos.h" #include "nstencil.h" #include "force.h" +#include "kokkos.h" +#include "transpose_helper_kokkos.h" namespace LAMMPS_NS { @@ -158,7 +160,7 @@ void NPairKokkos::build(NeighList *list_) mbins,nstencil, k_stencil.view(), k_stencilxyz.view(), - nlocal, + nlocal,nall,lmp->kokkos->neigh_transpose, atomKK->k_x.view(), atomKK->k_radius.view(), atomKK->k_type.view(), @@ -229,18 +231,33 @@ void NPairKokkos::build(NeighList *list_) #if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP) #define BINS_PER_BLOCK 2 - const int factor = atoms_per_bin<64?2:1; + const int factor = atoms_per_bin <64?2:1; #else const int factor = 1; #endif if (GHOST) { - NPairKokkosBuildFunctorGhost f(data); + NPairKokkosBuildFunctorGhost f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor); +#ifdef LMP_KOKKOS_GPU + if (ExecutionSpaceFromDevice::space == Device) { + int team_size = atoms_per_bin*factor; + int team_size_max = Kokkos::TeamPolicy(team_size,Kokkos::AUTO).team_size_max(f,Kokkos::ParallelForTag()); + if (team_size <= team_size_max) { + Kokkos::TeamPolicy config((mbins+factor-1)/factor,team_size); + Kokkos::parallel_for(config, f); + } else { // fall back to flat method + f.sharedsize = 0; + Kokkos::parallel_for(nall, f); + } + } else + Kokkos::parallel_for(nall, f); +#else Kokkos::parallel_for(nall, f); +#endif } else { if (newton_pair) { if (SIZE) { - NPairKokkosBuildFunctorSize f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor); + NPairKokkosBuildFunctorSize f(data,atoms_per_bin * 6 * sizeof(X_FLOAT) * factor); #ifdef LMP_KOKKOS_GPU if (ExecutionSpaceFromDevice::space == Device) { int team_size = atoms_per_bin*factor; @@ -326,6 +343,13 @@ void NPairKokkos::build(NeighList *list_) list->d_neighbors = typename AT::t_neighbors_2d(Kokkos::NoInit("neighlist:neighbors"), maxatoms, list->maxneighs); data.neigh_list.d_neighbors = list->d_neighbors; data.neigh_list.maxneighs = list->maxneighs; + + if (lmp->kokkos->neigh_transpose) { + data.neigh_list.d_neighbors_transpose = typename AT::t_neighbors_2d_lr(); + list->d_neighbors_transpose = typename AT::t_neighbors_2d_lr(); + list->d_neighbors_transpose = typename AT::t_neighbors_2d_lr(Kokkos::NoInit("neighlist:neighbors"), maxatoms, list->maxneighs); + data.neigh_list.d_neighbors_transpose = list->d_neighbors_transpose; + } } } @@ -338,6 +362,10 @@ void NPairKokkos::build(NeighList *list_) } list->k_ilist.template modify(); + + if (lmp->kokkos->neigh_transpose) + TransposeHelperKokkos(list->d_neighbors, list->d_neighbors_transpose); } /* ---------------------------------------------------------------------- */ @@ -416,7 +444,8 @@ void NeighborKokkosExecute:: else moltemplate = 0; // get subview of neighbors of i - const AtomNeighbors neighbors_i = neigh_list.get_neighbors(i); + const AtomNeighbors neighbors_i = neigh_transpose ? + neigh_list.get_neighbors_transpose(i) : neigh_list.get_neighbors(i); const X_FLOAT xtmp = x(i, 0); const X_FLOAT ytmp = x(i, 1); const X_FLOAT ztmp = x(i, 2); @@ -444,7 +473,7 @@ void NeighborKokkosExecute:: const X_FLOAT delx = xtmp - x(j, 0); const X_FLOAT dely = ytmp - x(j, 1); const X_FLOAT delz = ztmp - x(j, 2); - const X_FLOAT rsq = delx * delx + dely * dely + delz * delz; + const X_FLOAT rsq = delx*delx + dely*dely + delz*delz; if (rsq <= cutneighsq(itype,jtype)) { if (molecular != Atom::ATOMIC) { if (!moltemplate) @@ -455,18 +484,18 @@ void NeighborKokkosExecute:: /* tag[j]-tagprev); */ /* else which = 0; */ if (which == 0) { - if (n 0) { - if (n:: const X_FLOAT delx = xtmp - x(j, 0); const X_FLOAT dely = ytmp - x(j, 1); const X_FLOAT delz = ztmp - x(j, 2); - const X_FLOAT rsq = delx * delx + dely * dely + delz * delz; + const X_FLOAT rsq = delx*delx + dely*dely + delz*delz; if (rsq <= cutneighsq(itype,jtype)) { if (molecular != Atom::ATOMIC) { @@ -512,18 +541,18 @@ void NeighborKokkosExecute:: /* tag[j]-tagprev); */ /* else which = 0; */ if (which == 0) { - if (n 0) { - if (n::build_ItemGPU(typename Kokkos::TeamPolic /* loop over atoms in i's bin, */ const int atoms_per_bin = c_bins.extent(1); - const int BINS_PER_TEAM = dev.team_size()/atoms_per_bin<1?1:dev.team_size()/atoms_per_bin; + const int BINS_PER_TEAM = dev.team_size()/atoms_per_bin <1?1:dev.team_size()/atoms_per_bin; const int TEAMS_PER_BIN = atoms_per_bin/dev.team_size()<1?1:atoms_per_bin/dev.team_size(); const int MY_BIN = dev.team_rank()/atoms_per_bin; const int ibin = dev.league_rank()*BINS_PER_TEAM+MY_BIN; if (ibin >= mbins) return; - X_FLOAT* other_x = sharedmem; - other_x = other_x + 5*atoms_per_bin*MY_BIN; + X_FLOAT* other_x = sharedmem + 5*atoms_per_bin*MY_BIN; int* other_id = (int*) &other_x[4 * atoms_per_bin]; int bincount_current = c_bincount[ibin]; for (int kk = 0; kk < TEAMS_PER_BIN; kk++) { const int MY_II = dev.team_rank()%atoms_per_bin+kk*dev.team_size(); - const int i = MY_II < bincount_current ? c_bins(ibin, MY_II) : -1; - /* if necessary, goto next page and add pages */ + const int i = MY_II < bincount_current ? c_bins(ibin, MY_II) : -1; + /* if necessary, goto next page and add pages */ - int n = 0; + int n = 0; - X_FLOAT xtmp; - X_FLOAT ytmp; - X_FLOAT ztmp; - int itype; - const AtomNeighbors neighbors_i = neigh_list.get_neighbors((i>=0&&i= 0 && i < nlocal) ? i : 0; + const AtomNeighbors neighbors_i = neigh_transpose ? + neigh_list.get_neighbors_transpose(index) : neigh_list.get_neighbors(index); + + if (i >= 0) { + xtmp = x(i, 0); + ytmp = x(i, 1); + ztmp = x(i, 2); + itype = type(i); + other_x[MY_II] = xtmp; + other_x[MY_II + atoms_per_bin] = ytmp; + other_x[MY_II + 2 * atoms_per_bin] = ztmp; + other_x[MY_II + 3 * atoms_per_bin] = itype; + } + other_id[MY_II] = i; - if (i >= 0) { - xtmp = x(i, 0); - ytmp = x(i, 1); - ztmp = x(i, 2); - itype = type(i); - other_x[MY_II] = xtmp; - other_x[MY_II + atoms_per_bin] = ytmp; - other_x[MY_II + 2 * atoms_per_bin] = ztmp; - other_x[MY_II + 3 * atoms_per_bin] = itype; - } - other_id[MY_II] = i; #if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP) - int test = (__syncthreads_count(i >= 0 && i <= nlocal) == 0); - if (test) return; + int test = (__syncthreads_count(i >= 0 && i < nlocal) == 0); + if (test) return; #elif defined(KOKKOS_ENABLE_SYCL) - int not_done = (i >= 0 && i <= nlocal); - dev.team_reduce(Kokkos::Max(not_done)); - if(not_done == 0) return; + int not_done = (i >= 0 && i < nlocal); + dev.team_reduce(Kokkos::Max(not_done)); + if(not_done == 0) return; #elif defined(KOKKOS_ENABLE_OPENMPTARGET) - dev.team_barrier(); + dev.team_barrier(); #endif - if (i >= 0 && i < nlocal) { - #pragma unroll 4 - for (int m = 0; m < bincount_current; m++) { - int j = other_id[m]; - const int jtype = other_x[m + 3 * atoms_per_bin]; - - //for same bin as atom i skip j if i==j and skip atoms "below and to the left" if using halfneighborlists - if ((j == i) || - (HalfNeigh && !Newton && (j < i)) || - (HalfNeigh && Newton && - ((j < i) || - ((j >= nlocal) && ((x(j, 2) < ztmp) || (x(j, 2) == ztmp && x(j, 1) < ytmp) || - (x(j, 2) == ztmp && x(j, 1) == ytmp && x(j, 0) < xtmp))))) - ) continue; - if (Tri) { - if (x(j,2) < ztmp) continue; - if (x(j,2) == ztmp) { - if (x(j,1) < ytmp) continue; - if (x(j,1) == ytmp) { - if (x(j,0) < xtmp) continue; - if (x(j,0) == xtmp && j <= i) continue; - } - } - } - if (exclude && exclusion(i,j,itype,jtype)) continue; - const X_FLOAT delx = xtmp - other_x[m]; - const X_FLOAT dely = ytmp - other_x[m + atoms_per_bin]; - const X_FLOAT delz = ztmp - other_x[m + 2 * atoms_per_bin]; - const X_FLOAT rsq = delx * delx + dely * dely + delz * delz; - - if (rsq <= cutneighsq(itype,jtype)) { - if (molecular != Atom::ATOMIC) { - int which = 0; - if (!moltemplate) - which = NeighborKokkosExecute::find_special(i,j); - /* else if (imol >= 0) */ - /* which = find_special(onemols[imol]->special[iatom], */ - /* onemols[imol]->nspecial[iatom], */ - /* tag[j]-tagprev); */ - /* else which = 0; */ - if (which == 0) { - if (n 0) { - if (n::t_int_1d_const_um stencil - = d_stencil; - for (int k = 0; k < nstencil; k++) { - const int jbin = ibin + stencil[k]; - - if (ibin == jbin) continue; - - bincount_current = c_bincount[jbin]; - int j = MY_II < bincount_current ? c_bins(jbin, MY_II) : -1; - - if (j >= 0) { - other_x[MY_II] = x(j, 0); - other_x[MY_II + atoms_per_bin] = x(j, 1); - other_x[MY_II + 2 * atoms_per_bin] = x(j, 2); - other_x[MY_II + 3 * atoms_per_bin] = type(j); - } - - other_id[MY_II] = j; - - dev.team_barrier(); - if (i >= 0 && i < nlocal) { - #pragma unroll 8 + #pragma unroll 4 for (int m = 0; m < bincount_current; m++) { - const int j = other_id[m]; + int j = other_id[m]; const int jtype = other_x[m + 3 * atoms_per_bin]; - //if(HalfNeigh && (j < i)) continue; - if (HalfNeigh && !Newton && (j < i)) continue; - if (!HalfNeigh && j==i) continue; - if (Tri) { - if (x(j,2) < ztmp) continue; - if (x(j,2) == ztmp) { - if (x(j,1) < ytmp) continue; - if (x(j,1) == ytmp) { - if (x(j,0) < xtmp) continue; - if (x(j,0) == xtmp && j <= i) continue; + //for same bin as atom i skip j if i==j and skip atoms "below and to the left" if using halfneighborlists + if ((j == i) || + (HalfNeigh && !Newton && (j < i)) || + (HalfNeigh && Newton && + ((j < i) || + ((j >= nlocal) && ((x(j, 2) < ztmp) || (x(j, 2) == ztmp && x(j, 1) < ytmp) || + (x(j, 2) == ztmp && x(j, 1) == ytmp && x(j, 0) < xtmp))))) + ) continue; + if (Tri) { + if (x(j,2) < ztmp) continue; + if (x(j,2) == ztmp) { + if (x(j,1) < ytmp) continue; + if (x(j,1) == ytmp) { + if (x(j,0) < xtmp) continue; + if (x(j,0) == xtmp && j <= i) continue; + } } } - } if (exclude && exclusion(i,j,itype,jtype)) continue; - const X_FLOAT delx = xtmp - other_x[m]; const X_FLOAT dely = ytmp - other_x[m + atoms_per_bin]; const X_FLOAT delz = ztmp - other_x[m + 2 * atoms_per_bin]; - const X_FLOAT rsq = delx * delx + dely * dely + delz * delz; + const X_FLOAT rsq = delx*delx + dely*dely + delz*delz; if (rsq <= cutneighsq(itype,jtype)) { if (molecular != Atom::ATOMIC) { @@ -739,18 +692,18 @@ void NeighborKokkosExecute::build_ItemGPU(typename Kokkos::TeamPolic /* tag[j]-tagprev); */ /* else which = 0; */ if (which == 0) { - if (n 0) { - if (n::build_ItemGPU(typename Kokkos::TeamPolic } } dev.team_barrier(); - } - if (i >= 0 && i < nlocal) { - neigh_list.d_numneigh(i) = n; - neigh_list.d_ilist(i) = i; - } + const typename ArrayTypes::t_int_1d_const_um stencil + = d_stencil; + for (int k = 0; k < nstencil; k++) { + const int jbin = ibin + stencil[k]; - if (n > neigh_list.maxneighs) { - resize() = 1; + if (ibin == jbin) continue; - if (n > new_maxneighs()) new_maxneighs() = n; // avoid atomics, safe because in while loop - } + bincount_current = c_bincount[jbin]; + int j = MY_II < bincount_current ? c_bins(jbin, MY_II) : -1; + + if (j >= 0) { + other_x[MY_II] = x(j, 0); + other_x[MY_II + atoms_per_bin] = x(j, 1); + other_x[MY_II + 2 * atoms_per_bin] = x(j, 2); + other_x[MY_II + 3 * atoms_per_bin] = type(j); + } + + other_id[MY_II] = j; + + dev.team_barrier(); + + if (i >= 0 && i < nlocal) { + #pragma unroll 8 + for (int m = 0; m < bincount_current; m++) { + const int j = other_id[m]; + const int jtype = other_x[m + 3 * atoms_per_bin]; + + //if(HalfNeigh && (j < i)) continue; + if (HalfNeigh && !Newton && (j < i)) continue; + if (!HalfNeigh && j==i) continue; + if (Tri) { + if (x(j,2) < ztmp) continue; + if (x(j,2) == ztmp) { + if (x(j,1) < ytmp) continue; + if (x(j,1) == ytmp) { + if (x(j,0) < xtmp) continue; + if (x(j,0) == xtmp && j <= i) continue; + } + } + } + if (exclude && exclusion(i,j,itype,jtype)) continue; + + const X_FLOAT delx = xtmp - other_x[m]; + const X_FLOAT dely = ytmp - other_x[m + atoms_per_bin]; + const X_FLOAT delz = ztmp - other_x[m + 2 * atoms_per_bin]; + const X_FLOAT rsq = delx*delx + dely*dely + delz*delz; + + if (rsq <= cutneighsq(itype,jtype)) { + if (molecular != Atom::ATOMIC) { + int which = 0; + if (!moltemplate) + which = NeighborKokkosExecute::find_special(i,j); + /* else if (imol >= 0) */ + /* which = find_special(onemols[imol]->special[iatom], */ + /* onemols[imol]->nspecial[iatom], */ + /* tag[j]-tagprev); */ + /* else which = 0; */ + if (which == 0) { + if (n < neigh_list.maxneighs) neighbors_i(n++) = j; + else n++; + } else if (minimum_image_check(delx,dely,delz)) { + if (n < neigh_list.maxneighs) neighbors_i(n++) = j; + else n++; + } + else if (which > 0) { + if (n < neigh_list.maxneighs) neighbors_i(n++) = j ^ (which << SBBITS); + else n++; + } + } else { + if (n < neigh_list.maxneighs) neighbors_i(n++) = j; + else n++; + } + } + + } + } + dev.team_barrier(); + } + + if (i >= 0 && i < nlocal) { + neigh_list.d_numneigh(i) = n; + neigh_list.d_ilist(i) = i; + } + + if (n > neigh_list.maxneighs) { + resize() = 1; + + if (n > new_maxneighs()) new_maxneighs() = n; // avoid atomics, safe because in while loop + } } } #endif @@ -779,7 +810,7 @@ void NeighborKokkosExecute::build_ItemGPU(typename Kokkos::TeamPolic template template KOKKOS_FUNCTION void NeighborKokkosExecute:: - build_Item_Ghost(const int &i) const + build_ItemGhost(const int &i) const { /* if necessary, goto next page and add pages */ int n = 0; @@ -789,7 +820,8 @@ void NeighborKokkosExecute:: else moltemplate = 0; // get subview of neighbors of i - const AtomNeighbors neighbors_i = neigh_list.get_neighbors(i); + const AtomNeighbors neighbors_i = neigh_transpose ? + neigh_list.get_neighbors_transpose(i) : neigh_list.get_neighbors(i); const X_FLOAT xtmp = x(i, 0); const X_FLOAT ytmp = x(i, 1); const X_FLOAT ztmp = x(i, 2); @@ -833,24 +865,23 @@ void NeighborKokkosExecute:: /* tag[j]-tagprev); */ /* else which = 0; */ if (which == 0) { - if (n 0) { - if (n:: const X_FLOAT rsq = delx*delx + dely*dely + delz*delz; if (rsq <= cutneighsq(itype,jtype)) { - if (n:: /* ---------------------------------------------------------------------- */ +#ifdef LMP_KOKKOS_GPU +template template +LAMMPS_DEVICE_FUNCTION inline +void NeighborKokkosExecute::build_ItemGhostGPU(typename Kokkos::TeamPolicy::member_type dev, + size_t sharedsize) const +{ + auto* sharedmem = static_cast(dev.team_shmem().get_shmem(sharedsize)); + // loop over atoms in i's bin + + const int atoms_per_bin = c_bins.extent(1); + const int BINS_PER_TEAM = dev.team_size()/atoms_per_bin <1?1:dev.team_size()/atoms_per_bin; + const int TEAMS_PER_BIN = atoms_per_bin/dev.team_size()<1?1:atoms_per_bin/dev.team_size(); + const int MY_BIN = dev.team_rank()/atoms_per_bin; + + const int ibin = dev.league_rank()*BINS_PER_TEAM+MY_BIN; + + if (ibin >= mbins) return; + + X_FLOAT* other_x = sharedmem + 5*atoms_per_bin*MY_BIN; + int* other_id = (int*) &other_x[4 * atoms_per_bin]; + + int bincount_current = c_bincount[ibin]; + + for (int kk = 0; kk < TEAMS_PER_BIN; kk++) { + const int MY_II = dev.team_rank()%atoms_per_bin+kk*dev.team_size(); + const int i = MY_II < bincount_current ? c_bins(ibin, MY_II) : -1; + + int n = 0; + + X_FLOAT xtmp; + X_FLOAT ytmp; + X_FLOAT ztmp; + int itype; + const int index = (i >= 0 && i < nall) ? i : 0; + const AtomNeighbors neighbors_i = neigh_transpose ? + neigh_list.get_neighbors_transpose(index) : neigh_list.get_neighbors(index); + + if (i >= 0) { + xtmp = x(i, 0); + ytmp = x(i, 1); + ztmp = x(i, 2); + itype = type(i); + other_x[MY_II] = xtmp; + other_x[MY_II + atoms_per_bin] = ytmp; + other_x[MY_II + 2 * atoms_per_bin] = ztmp; + other_x[MY_II + 3 * atoms_per_bin] = itype; + } + other_id[MY_II] = i; +#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP) + int test = (__syncthreads_count(i >= 0 && i < nall) == 0); + if (test) return; +#elif defined(KOKKOS_ENABLE_SYCL) + int not_done = (i >= 0 && i < nall); + dev.team_reduce(Kokkos::Max(not_done)); + if (not_done == 0) return; +#elif defined(KOKKOS_ENABLE_OPENMPTARGET) + dev.team_barrier(); +#endif + + int which = 0; + int moltemplate; + if (molecular == Atom::TEMPLATE) moltemplate = 1; + else moltemplate = 0; + + const typename ArrayTypes::t_int_1d_const_um stencil + = d_stencil; + const typename ArrayTypes::t_int_1d_3_const_um stencilxyz + = d_stencilxyz; + + // loop over all atoms in surrounding bins in stencil including self + // when i is a ghost atom, must check if stencil bin is out of bounds + // skip i = j + // no molecular test when i = ghost atom + + int ghost = (i >= nlocal && i < nall); + int binxyz[3]; + if (ghost) + coord2bin(xtmp, ytmp, ztmp, binxyz); + const int xbin = binxyz[0]; + const int ybin = binxyz[1]; + const int zbin = binxyz[2]; + for (int k = 0; k < nstencil; k++) { + int active = 1; + if (ghost) { + const int xbin2 = xbin + stencilxyz(k,0); + const int ybin2 = ybin + stencilxyz(k,1); + const int zbin2 = zbin + stencilxyz(k,2); + if (xbin2 < 0 || xbin2 >= mbinx || + ybin2 < 0 || ybin2 >= mbiny || + zbin2 < 0 || zbin2 >= mbinz) active = 0; + } + + const int jbin = ibin + stencil[k]; + bincount_current = c_bincount[jbin]; + int j = MY_II < bincount_current ? c_bins(jbin, MY_II) : -1; + + if (j >= 0) { + other_x[MY_II] = x(j, 0); + other_x[MY_II + atoms_per_bin] = x(j, 1); + other_x[MY_II + 2 * atoms_per_bin] = x(j, 2); + other_x[MY_II + 3 * atoms_per_bin] = type(j); + } + + other_id[MY_II] = j; + + dev.team_barrier(); + + if (active && i >= 0 && i < nall) { + #pragma unroll 4 + for (int m = 0; m < bincount_current; m++) { + const int j = other_id[m]; + + if (HalfNeigh && j <= i) continue; + else if (j == i) continue; + + const int jtype = other_x[m + 3 * atoms_per_bin]; + if (exclude && exclusion(i,j,itype,jtype)) continue; + + const X_FLOAT delx = xtmp - other_x[m]; + const X_FLOAT dely = ytmp - other_x[m + atoms_per_bin]; + const X_FLOAT delz = ztmp - other_x[m + 2 * atoms_per_bin]; + const X_FLOAT rsq = delx*delx + dely*dely + delz*delz; + + if (rsq <= cutneighsq(itype,jtype)) { + if (molecular != Atom::ATOMIC && !ghost) { + if (!moltemplate) + which = NeighborKokkosExecute::find_special(i,j); + /* else if (imol >= 0) */ + /* which = find_special(onemols[imol]->special[iatom], */ + /* onemols[imol]->nspecial[iatom], */ + /* tag[j]-tagprev); */ + /* else which = 0; */ + if (which == 0) { + if (n < neigh_list.maxneighs) neighbors_i(n++) = j; + else n++; + } else if (minimum_image_check(delx,dely,delz)) { + if (n < neigh_list.maxneighs) neighbors_i(n++) = j; + else n++; + } + else if (which > 0) { + if (n < neigh_list.maxneighs) neighbors_i(n++) = j ^ (which << SBBITS); + else n++; + } + } else { + if (n < neigh_list.maxneighs) neighbors_i(n++) = j; + else n++; + } + } + } + } + dev.team_barrier(); + } + + if (i >= 0 && i < nall) { + neigh_list.d_numneigh(i) = n; + neigh_list.d_ilist(i) = i; + } + + if (n > neigh_list.maxneighs) { + resize() = 1; + + if (n > new_maxneighs()) new_maxneighs() = n; // avoid atomics, safe because in while loop + } + } +} +#endif + +/* ---------------------------------------------------------------------- */ + template template KOKKOS_FUNCTION void NeighborKokkosExecute:: @@ -909,7 +1109,8 @@ void NeighborKokkosExecute:: // get subview of neighbors of i - const AtomNeighbors neighbors_i = neigh_list.get_neighbors(i); + const AtomNeighbors neighbors_i = neigh_transpose ? + neigh_list.get_neighbors_transpose(i) : neigh_list.get_neighbors(i); const X_FLOAT xtmp = x(i, 0); const X_FLOAT ytmp = x(i, 1); const X_FLOAT ztmp = x(i, 2); @@ -940,12 +1141,12 @@ void NeighborKokkosExecute:: const X_FLOAT delx = xtmp - x(j, 0); const X_FLOAT dely = ytmp - x(j, 1); const X_FLOAT delz = ztmp - x(j, 2); - const X_FLOAT rsq = delx * delx + dely * dely + delz * delz; + const X_FLOAT rsq = delx*delx + dely*dely + delz*delz; const X_FLOAT radsum = radi + radius(j); const X_FLOAT cutsq = (radsum + skin) * (radsum + skin); if (rsq <= cutsq) { - if (n:: const X_FLOAT delx = xtmp - x(j, 0); const X_FLOAT dely = ytmp - x(j, 1); const X_FLOAT delz = ztmp - x(j, 2); - const X_FLOAT rsq = delx * delx + dely * dely + delz * delz; + const X_FLOAT rsq = delx*delx + dely*dely + delz*delz; const X_FLOAT radsum = radi + radius(j); const X_FLOAT cutsq = (radsum + skin) * (radsum + skin); if (rsq <= cutsq) { - if (n::build_ItemSizeGPU(typename Kokkos::TeamP /* loop over atoms in i's bin, */ const int atoms_per_bin = c_bins.extent(1); - const int BINS_PER_TEAM = dev.team_size()/atoms_per_bin<1?1:dev.team_size()/atoms_per_bin; + const int BINS_PER_TEAM = dev.team_size()/atoms_per_bin <1?1:dev.team_size()/atoms_per_bin; const int TEAMS_PER_BIN = atoms_per_bin/dev.team_size()<1?1:atoms_per_bin/dev.team_size(); const int MY_BIN = dev.team_rank()/atoms_per_bin; const int ibin = dev.league_rank()*BINS_PER_TEAM+MY_BIN; if (ibin >= mbins) return; - X_FLOAT* other_x = sharedmem; - other_x = other_x + 6*atoms_per_bin*MY_BIN; + X_FLOAT* other_x = sharedmem + 6*atoms_per_bin*MY_BIN; int* other_id = (int*) &other_x[5 * atoms_per_bin]; int bincount_current = c_bincount[ibin]; @@ -1044,7 +1244,9 @@ void NeighborKokkosExecute::build_ItemSizeGPU(typename Kokkos::TeamP X_FLOAT ztmp; X_FLOAT radi; int itype; - const AtomNeighbors neighbors_i = neigh_list.get_neighbors((i>=0&&i= 0 && i < nlocal) ? i : 0; + const AtomNeighbors neighbors_i = neigh_transpose ? + neigh_list.get_neighbors_transpose(index) : neigh_list.get_neighbors(index); const int mask_history = 3 << SBBITS; if (i >= 0) { @@ -1061,12 +1263,12 @@ void NeighborKokkosExecute::build_ItemSizeGPU(typename Kokkos::TeamP } other_id[MY_II] = i; #if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP) - int test = (__syncthreads_count(i >= 0 && i <= nlocal) == 0); + int test = (__syncthreads_count(i >= 0 && i < nlocal) == 0); if (test) return; #elif defined(KOKKOS_ENABLE_SYCL) - int not_done = (i >= 0 && i <= nlocal); + int not_done = (i >= 0 && i < nlocal); dev.team_reduce(Kokkos::Max(not_done)); - if(not_done == 0) return; + if (not_done == 0) return; #elif defined(KOKKOS_ENABLE_OPENMPTARGET) dev.team_barrier(); #endif @@ -1099,12 +1301,12 @@ void NeighborKokkosExecute::build_ItemSizeGPU(typename Kokkos::TeamP const X_FLOAT delx = xtmp - other_x[m]; const X_FLOAT dely = ytmp - other_x[m + atoms_per_bin]; const X_FLOAT delz = ztmp - other_x[m + 2 * atoms_per_bin]; - const X_FLOAT rsq = delx * delx + dely * dely + delz * delz; + const X_FLOAT rsq = delx*delx + dely*dely + delz*delz; const X_FLOAT radsum = radi + other_x[m + 4 * atoms_per_bin]; const X_FLOAT cutsq = (radsum + skin) * (radsum + skin); if (rsq <= cutsq) { - if (n::build_ItemSizeGPU(typename Kokkos::TeamP const X_FLOAT delx = xtmp - other_x[m]; const X_FLOAT dely = ytmp - other_x[m + atoms_per_bin]; const X_FLOAT delz = ztmp - other_x[m + 2 * atoms_per_bin]; - const X_FLOAT rsq = delx * delx + dely * dely + delz * delz; + const X_FLOAT rsq = delx*delx + dely*dely + delz*delz; const X_FLOAT radsum = radi + other_x[m + 4 * atoms_per_bin]; const X_FLOAT cutsq = (radsum + skin) * (radsum + skin); if (rsq <= cutsq) { - if (n KOKKOS_FUNCTION - void build_Item_Ghost(const int &i) const; + void build_ItemGhost(const int &i) const; template KOKKOS_FUNCTION @@ -316,6 +316,11 @@ class NeighborKokkosExecute void build_ItemGPU(typename Kokkos::TeamPolicy::member_type dev, size_t sharedsize) const; + template + LAMMPS_DEVICE_FUNCTION inline + void build_ItemGhostGPU(typename Kokkos::TeamPolicy::member_type dev, + size_t sharedsize) const; + template LAMMPS_DEVICE_FUNCTION inline void build_ItemSizeGPU(typename Kokkos::TeamPolicy::member_type dev, @@ -422,13 +427,43 @@ struct NPairKokkosBuildFunctorGhost { typedef DeviceType device_type; const NeighborKokkosExecute c; + size_t sharedsize; - NPairKokkosBuildFunctorGhost(const NeighborKokkosExecute &_c):c(_c) {} + NPairKokkosBuildFunctorGhost(const NeighborKokkosExecute &_c, + size_t _sharedsize):c(_c), + sharedsize(_sharedsize) {} KOKKOS_INLINE_FUNCTION void operator() (const int & i) const { - c.template build_Item_Ghost(i); + c.template build_ItemGhost(i); } + +#ifdef LMP_KOKKOS_GPU + LAMMPS_DEVICE_FUNCTION inline + void operator() (typename Kokkos::TeamPolicy::member_type dev) const { + c.template build_ItemGhostGPU(dev, sharedsize); + } + size_t team_shmem_size(const int team_size) const { (void) team_size; return sharedsize; } +#endif +}; + +template +struct NPairKokkosBuildFunctorGhost { + typedef LMPHostType device_type; + + const NeighborKokkosExecute c; + size_t sharedsize; + + NPairKokkosBuildFunctorGhost(const NeighborKokkosExecute &_c, + size_t _sharedsize):c(_c), + sharedsize(_sharedsize) {} + + KOKKOS_INLINE_FUNCTION + void operator() (const int & i) const { + c.template build_ItemGhost(i); + } + + void operator() (typename Kokkos::TeamPolicy::member_type /*dev*/) const {} // Should error out }; template diff --git a/src/KOKKOS/transpose_helper_kokkos.h b/src/KOKKOS/transpose_helper_kokkos.h new file mode 100644 index 0000000000..464bf04c4c --- /dev/null +++ b/src/KOKKOS/transpose_helper_kokkos.h @@ -0,0 +1,207 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Evan Weinberg (NVIDIA) +------------------------------------------------------------------------- */ + +#include "kokkos_type.h" + +namespace LAMMPS_NS { + +// This helper class implements optimized out-of-place transposes for Rank-2 views. +// In the case where both views have the same layout, it uses Kokkos' default `deep_copy`. +// In the case where the views have different layouts (LayoutLeft/LayoutRight), it implements +// a transpose through scratch memory staging +template +struct TransposeHelperKokkos { + + using DeviceType = DeviceType_; + + using t_view_dst = t_view_dst_; + using t_view_src = t_view_src_; + + static_assert(std::is_same::value, "Value types do not match"); + static_assert(t_view_dst::Rank == 2, "Destination view rank != 2"); + static_assert(t_view_src::Rank == 2, "Source view rank != 2"); + + using dst_layout = typename t_view_dst::traits::array_layout; + using src_layout = typename t_view_src::traits::array_layout; + + typedef ArrayTypes AT; + + using t_view_value = typename t_view_dst::value_type; + + + // set tile size, will update so each thread does multiple loads +#ifdef KOKKOS_ENABLE_HIP + static constexpr int tile_size = 16; + static constexpr int threads_per_team = 8; +#else + static constexpr int tile_size = 32; + static constexpr int threads_per_team = 4; +#endif + static constexpr int bank_pad = 1; + static constexpr int elem_size = sizeof(t_view_value); + + + t_view_dst d_dst; + t_view_src d_src; + + bool src_is_layout_right; + + // extents divided by vector length, rounded up + int extent_tiles[2]; + + // 1 if extent is divisible by vector length, 0 otherwise + int extent_is_multiple_tile_size[2]; + + // number of teams + int n_teams; + + // amount of shared memory per thread + int shared_mem_per_thread; + + TransposeHelperKokkos(t_view_dst d_dst_, t_view_src d_src_) + : d_dst(d_dst_), d_src(d_src_) { + + assert(d_dst.extent(0) == d_src.extent(0) && d_dst.extent(1) == d_dst.extent(1)); + + if (std::is_same::value) { + Kokkos::deep_copy(d_dst, d_src); + } else { + + src_is_layout_right = std::is_same::value; + + extent_tiles[0] = (d_dst.extent(0) + tile_size - 1) / tile_size; + extent_tiles[1] = (d_dst.extent(1) + tile_size - 1) / tile_size; + + extent_is_multiple_tile_size[0] = (extent_tiles[0] * tile_size == d_dst.extent(0)) ? 1 : 0; + extent_is_multiple_tile_size[1] = (extent_tiles[1] * tile_size == d_dst.extent(1)) ? 1 : 0; + + n_teams = (extent_tiles[0] * extent_tiles[1] + threads_per_team - 1) / threads_per_team; + + shared_mem_per_thread = tile_size * (tile_size + bank_pad) * elem_size; + + Kokkos::TeamPolicy transpose_policy(n_teams, threads_per_team, tile_size); + transpose_policy = transpose_policy.set_scratch_size(0, Kokkos::PerThread(shared_mem_per_thread)); + + Kokkos::parallel_for(transpose_policy, *this); + } + } + + KOKKOS_INLINE_FUNCTION + void operator()(const typename Kokkos::TeamPolicy::member_type& team_member) const { + + t_view_value* buffer = (t_view_value*)(team_member.team_shmem().get_shmem(shared_mem_per_thread * threads_per_team, 0)) + (shared_mem_per_thread / elem_size) * team_member.team_rank(); + + // extract flattened tile + const int flattened_idx = team_member.team_rank() + team_member.league_rank() * threads_per_team; + + // get range x, range y tile + int extent_tile_id[2]; + if (src_is_layout_right) { + // keep extent 1 tiles close together b/c loading from layout right + extent_tile_id[0] = flattened_idx / extent_tiles[1]; + extent_tile_id[1] = flattened_idx - extent_tile_id[0] * extent_tiles[1]; + } else { + // keep extent 0 tiles close together b/c loading from layout left + extent_tile_id[1] = flattened_idx / extent_tiles[0]; + extent_tile_id[0] = flattened_idx - extent_tile_id[1] * extent_tiles[0]; + } + + int elem[2]; + elem[0] = extent_tile_id[0] * tile_size; + elem[1] = extent_tile_id[1] * tile_size; + + if (elem[0] >= d_dst.extent(0) || + elem[1] >= d_dst.extent(1)) return; + + // determine if a row/column is a full `tile_size` in size or not + bool perfect_pad[2]; + perfect_pad[0] = (extent_is_multiple_tile_size[0] == 1 || extent_tile_id[0] + 1 < extent_tiles[0]); + perfect_pad[1] = (extent_is_multiple_tile_size[1] == 1 || extent_tile_id[1] + 1 < extent_tiles[1]); + + // load phase + if (src_is_layout_right) { + Kokkos::parallel_for(Kokkos::ThreadVectorRange(team_member, tile_size), + [&] (const int j) { + + if (elem[1] + j < d_src.extent(1)) { + if (perfect_pad[0]) { + for (int i = 0; i < tile_size; i++) + buffer[i * (tile_size + bank_pad) + j] = d_src(elem[0] + i, elem[1] + j); + } else { + for (int i = 0; i < (d_src.extent(0) - elem[0]); i++) + buffer[i * (tile_size + bank_pad) + j] = d_src(elem[0] + i, elem[1] + j); + } + } + }); + + } else { + // src is layout left + Kokkos::parallel_for(Kokkos::ThreadVectorRange(team_member, tile_size), + [&] (const int i) { + + if (elem[0] + i < d_src.extent(0)) { + if (perfect_pad[1]) { + for (int j = 0; j < tile_size; j++) + buffer[i * (tile_size + bank_pad) + j] = d_src(elem[0] + i, elem[1] + j); + } else { + for (int j = 0; j < (d_src.extent(1) - elem[1]); j++) + buffer[i * (tile_size + bank_pad) + j] = d_src(elem[0] + i, elem[1] + j); + } + } + }); + } + + // No need for an extra sync b/c there is an implicit sync at the end + // of a ThreadVectorRange as per the Kokkos programming model + + // save phase + if (src_is_layout_right) { + Kokkos::parallel_for(Kokkos::ThreadVectorRange(team_member, tile_size), + [&] (const int i) { + + if (elem[0] + i < d_dst.extent(0)) { + if (perfect_pad[1]) { + for (int j = 0; j < tile_size; j++) + d_dst(elem[0] + i, elem[1] + j) = buffer[i * (tile_size + bank_pad) + j]; + } else { + for (int j = 0; j < (d_dst.extent(1) - elem[1]); j++) + d_dst(elem[0] + i, elem[1] + j) = buffer[i * (tile_size + bank_pad) + j]; + } + } + }); + } else { + + // src is layout left + Kokkos::parallel_for(Kokkos::ThreadVectorRange(team_member, tile_size), + [&] (const int j) { + + if (elem[1] + j < d_dst.extent(1)) { + if (perfect_pad[0]) { + for (int i = 0; i < tile_size; i++) + d_dst(elem[0] + i, elem[1] + j) = buffer[i * (tile_size + bank_pad) + j]; + } else { + for (int i = 0; i < (d_dst.extent(0) - elem[0]); i++) + d_dst(elem[0] + i, elem[1] + j) = buffer[i * (tile_size + bank_pad) + j]; + } + } + }); + } + + } +}; + +}