diff --git a/src/accelerator_cuda.h b/src/accelerator_cuda.h index 1f4f863533..fb99ce2004 100644 --- a/src/accelerator_cuda.h +++ b/src/accelerator_cuda.h @@ -32,6 +32,8 @@ // needed for compiling when USER-CUDA is not installed #include "comm.h" +#include "domain.h" +#include "neighbor.h" #include "modify.h" #include "verlet.h" diff --git a/src/accelerator_kokkos.h b/src/accelerator_kokkos.h index 2b2ba1a957..e6b2a21e84 100644 --- a/src/accelerator_kokkos.h +++ b/src/accelerator_kokkos.h @@ -22,6 +22,8 @@ #include "kokkos.h" #include "atom_kokkos.h" #include "comm_kokkos.h" +#include "domain_kokkos.h" +#include "neighbor_kokkos.h" #include "modify_kokkos.h" #else @@ -29,6 +31,12 @@ // dummy interface to KOKKOS // needed for compiling when KOKKOS is not installed +#include "atom.h" +#include "comm.h" +#include "domain.h" +#include "neighbor.h" +#include "modify.h" + namespace LAMMPS_NS { class KokkosLMP { @@ -38,29 +46,37 @@ class KokkosLMP { KokkosLMP(class LAMMPS *, int, char **) {kokkos_exists = 0;} ~KokkosLMP() {} void accelerator(int, char **) {} + int neigh_list_kokkos(int) {return 0;} + int neigh_count(int) {return 0;} }; class AtomKokkos : public Atom { public: - AtomKokkos(class LAMMPS *lmp) : Atom(lmp) {} + AtomKokkos(class LAMMPS *lmp) : Atom(lmp) {} ~AtomKokkos() {} }; class CommKokkos : public Comm { public: - CommKokkos(class LAMMPS *lmp) : Comm(lmp) {} + CommKokkos(class LAMMPS *lmp) : Comm(lmp) {} ~CommKokkos() {} }; +class DomainKokkos : public Domain { + public: + DomainKokkos(class LAMMPS *lmp) : Domain(lmp) {} + ~DomainKokkos() {} +}; + class NeighborKokkos : public Neighbor { public: - NeighborKokkos(class LAMMPS *lmp) : Neighbor(lmp) {} + NeighborKokkos(class LAMMPS *lmp) : Neighbor(lmp) {} ~NeighborKokkos() {} }; class ModifyKokkos : public Modify { public: - ModifyKokkos(class LAMMPS *lmp) : Modify(lmp) {} + ModifyKokkos(class LAMMPS *lmp) : Modify(lmp) {} ~ModifyKokkos() {} }; diff --git a/src/atom.h b/src/atom.h index fa917eec76..ff396b2d44 100644 --- a/src/atom.h +++ b/src/atom.h @@ -62,8 +62,8 @@ class Atom : protected Pointers { double *eradius,*ervel,*erforce,*ervelforce; double *cs,*csforce,*vforce; int *etag; - double *rho, *drho; - double *e, *de; + double *rho,*drho; + double *e,*de; double **vest; double *cv; diff --git a/src/finish.cpp b/src/finish.cpp index c0363187f2..352c547b10 100644 --- a/src/finish.cpp +++ b/src/finish.cpp @@ -16,8 +16,9 @@ #include "string.h" #include "stdio.h" #include "finish.h" -#include "timer.h" +#include "lammps.h" #include "universe.h" +#include "accelerator_kokkos.h" #include "atom.h" #include "atom_vec.h" #include "molecule.h" @@ -29,6 +30,7 @@ #include "neighbor.h" #include "neigh_list.h" #include "neigh_request.h" +#include "timer.h" #include "output.h" #include "memory.h" @@ -527,22 +529,28 @@ void Finish::end(int flag) // find a non-skip neighbor list containing half the pairwise interactions // count neighbors in that list for stats purposes + // allow it to be Kokkos neigh list as well - for (m = 0; m < neighbor->old_nrequest; m++) + for (m = 0; m < neighbor->old_nrequest; m++) { if ((neighbor->old_requests[m]->half || neighbor->old_requests[m]->gran || neighbor->old_requests[m]->respaouter || neighbor->old_requests[m]->half_from_full) && - neighbor->old_requests[m]->skip == 0 && - neighbor->lists[m]->numneigh) break; + neighbor->old_requests[m]->skip == 0) { + if (neighbor->lists[m] && neighbor->lists[m]->numneigh) break; + if (lmp->kokkos && lmp->kokkos->neigh_list_kokkos(m)) break; + } + } nneigh = 0; if (m < neighbor->old_nrequest) { - int inum = neighbor->lists[m]->inum; - int *ilist = neighbor->lists[m]->ilist; - int *numneigh = neighbor->lists[m]->numneigh; - for (i = 0; i < inum; i++) - nneigh += numneigh[ilist[i]]; + if (neighbor->lists[m]) { + int inum = neighbor->lists[m]->inum; + int *ilist = neighbor->lists[m]->ilist; + int *numneigh = neighbor->lists[m]->numneigh; + for (i = 0; i < inum; i++) + nneigh += numneigh[ilist[i]]; + } else if (lmp->kokkos) nneigh = lmp->kokkos->neigh_count(m); } tmp = nneigh; @@ -565,19 +573,23 @@ void Finish::end(int flag) // find a non-skip neighbor list containing full pairwise interactions // count neighbors in that list for stats purposes - for (m = 0; m < neighbor->old_nrequest; m++) + for (m = 0; m < neighbor->old_nrequest; m++) { if (neighbor->old_requests[m]->full && - neighbor->old_requests[m]->skip == 0) break; + neighbor->old_requests[m]->skip == 0) { + if (neighbor->lists[m] && neighbor->lists[m]->numneigh) break; + if (lmp->kokkos && lmp->kokkos->neigh_list_kokkos(m)) break; + } + } nneighfull = 0; if (m < neighbor->old_nrequest) { - if (neighbor->lists[m]->numneigh > 0) { + if (neighbor->lists[m]) { int inum = neighbor->lists[m]->inum; int *ilist = neighbor->lists[m]->ilist; int *numneigh = neighbor->lists[m]->numneigh; for (i = 0; i < inum; i++) nneighfull += numneigh[ilist[i]]; - } + } else if (lmp->kokkos) nneighfull = lmp->kokkos->neigh_count(m); tmp = nneighfull; stats(1,&tmp,&ave,&max,&min,10,histo); diff --git a/src/fix.cpp b/src/fix.cpp index 2a794fe8ae..a2b772400d 100644 --- a/src/fix.cpp +++ b/src/fix.cpp @@ -79,8 +79,9 @@ Fix::Fix(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp) datamask = ALL_MASK; datamask_ext = ALL_MASK; - datamask_read = datamask_read_ext = ALL_MASK; - datamask_modify = datamask_modify_ext = ALL_MASK; + execution_space = Host; + datamask_read = ALL_MASK; + datamask_modify = ALL_MASK; } /* ---------------------------------------------------------------------- */ diff --git a/src/fix.h b/src/fix.h index 5b1f5e2dea..92f377ff5a 100644 --- a/src/fix.h +++ b/src/fix.h @@ -84,10 +84,10 @@ class Fix : protected Pointers { int restart_reset; // 1 if restart just re-initialized fix - // KOKKOS host/device flag and per-fix data masks + // KOKKOS host/device flag and data masks ExecutionSpace execution_space; - unsigned int datamask_read, datamask_modify; + unsigned int datamask_read,datamask_modify; // USER-CUDA per-fix data masks diff --git a/src/lammps.cpp b/src/lammps.cpp index b69cd7bf3c..a4daffc34a 100644 --- a/src/lammps.cpp +++ b/src/lammps.cpp @@ -553,9 +553,11 @@ void LAMMPS::create() else comm = new Comm(this); if (cuda) neighbor = new NeighborCuda(this); + else if (kokkos) neighbor = new NeighborKokkos(this); else neighbor = new Neighbor(this); if (cuda) domain = new DomainCuda(this); + else if (kokkos) domain = new DomainKokkos(this); #ifdef LMP_USER_OMP else domain = new DomainOMP(this); #else @@ -630,6 +632,7 @@ void LAMMPS::destroy() delete force; delete group; delete output; + delete modify; // modify must come after output, force, update // since they delete fixes delete domain; // domain must come after modify diff --git a/src/neigh_list.h b/src/neigh_list.h index b5b277c218..0329c14040 100644 --- a/src/neigh_list.h +++ b/src/neigh_list.h @@ -78,7 +78,7 @@ class NeighList : protected Pointers { class CudaNeighList *cuda_list; // CUDA neighbor list NeighList(class LAMMPS *); - ~NeighList(); + virtual ~NeighList(); void setup_pages(int, int, int); // setup page data structures void grow(int); // grow maxlocal void stencil_allocate(int, int); // allocate stencil arrays @@ -87,7 +87,7 @@ class NeighList : protected Pointers { int get_maxlocal() {return maxatoms;} bigint memory_usage(); - private: + protected: int maxatoms; // size of allocated atom arrays }; diff --git a/src/neigh_request.cpp b/src/neigh_request.cpp index dfcb20e423..5d0044556e 100644 --- a/src/neigh_request.cpp +++ b/src/neigh_request.cpp @@ -45,6 +45,7 @@ NeighRequest::NeighRequest(LAMMPS *lmp) : Pointers(lmp) // default is no neighbors of ghosts // default is no CUDA neighbor list build // default is no multi-threaded neighbor list build + // default is no Kokkos neighbor list build occasional = 0; newton = 0; @@ -53,6 +54,7 @@ NeighRequest::NeighRequest(LAMMPS *lmp) : Pointers(lmp) ghost = 0; cudable = 0; omp = 0; + kokkos_host = kokkos_device = 0; // default is no copy or skip diff --git a/src/neigh_request.h b/src/neigh_request.h index e16c2df814..4facd9f405 100644 --- a/src/neigh_request.h +++ b/src/neigh_request.h @@ -80,6 +80,11 @@ class NeighRequest : protected Pointers { int omp; + // 1 if using Kokkos neighbor build + + int kokkos_host; + int kokkos_device; + // set by neighbor and pair_hybrid after all requests are made // these settings do not change kind value diff --git a/src/neighbor.cpp b/src/neighbor.cpp index 731278a87e..d44fc351f2 100644 --- a/src/neighbor.cpp +++ b/src/neighbor.cpp @@ -243,7 +243,8 @@ void Neighbor::init() n = atom->ntypes; if (cutneighsq == NULL) { - memory->create(cutneighsq,n+1,n+1,"neigh:cutneighsq"); + if (lmp->kokkos) init_cutneighsq_kokkos(n); + else memory->create(cutneighsq,n+1,n+1,"neigh:cutneighsq"); memory->create(cutneighghostsq,n+1,n+1,"neigh:cutneighghostsq"); cuttype = new double[n+1]; cuttypesq = new double[n+1]; @@ -468,16 +469,27 @@ void Neighbor::init() delete [] pair_build; delete [] stencil_create; - nlist = nrequest; - lists = new NeighList*[nlist]; - pair_build = new PairPtr[nlist]; - stencil_create = new StencilPtr[nlist]; + if (lmp->kokkos) nlist = init_lists_kokkos(); + else nlist = nrequest; + + lists = new NeighList*[nrequest]; + pair_build = new PairPtr[nrequest]; + stencil_create = new StencilPtr[nrequest]; + + // initialize to NULL since some may be Kokkos lists + + for (i = 0; i < nrequest; i++) { + lists[i] = NULL; + pair_build[i] = NULL; + stencil_create[i] = NULL; + } // create individual lists, one per request // pass list ptr back to requestor (except for Command class) // wait to allocate initial pages until copy lists are detected - for (i = 0; i < nlist; i++) { + for (i = 0; i < nrequest; i++) { + if (requests[i]->kokkos_host || requests[i]->kokkos_device) continue; lists[i] = new NeighList(lmp); lists[i]->index = i; @@ -520,7 +532,8 @@ void Neighbor::init() int processed; - for (i = 0; i < nlist; i++) { + for (i = 0; i < nrequest; i++) { + if (!lists[i]) continue; processed = 0; if (requests[i]->copy) { @@ -559,17 +572,20 @@ void Neighbor::init() if (processed) continue; if (requests[i]->pair && requests[i]->half) { - for (j = 0; j < nlist; j++) + for (j = 0; j < nrequest; j++) { + if (!lists[j]) continue; if (requests[j]->full && requests[j]->occasional == 0 && requests[j]->skip == 0) break; - if (j < nlist) { + } + if (j < nrequest) { requests[i]->half = 0; requests[i]->half_from_full = 1; lists[i]->listfull = lists[j]; } } else if (requests[i]->fix || requests[i]->compute) { - for (j = 0; j < nlist; j++) { + for (j = 0; j < nrequest; j++) { + if (!lists[j]) continue; if (requests[i]->half && requests[j]->pair && requests[j]->skip == 0 && requests[j]->half) break; if (requests[i]->full && requests[j]->pair && @@ -579,20 +595,21 @@ void Neighbor::init() if (requests[i]->half && requests[j]->pair && requests[j]->skip == 0 && requests[j]->respaouter) break; } - if (j < nlist && requests[j]->cudable != requests[i]->cudable) - j = nlist; - if (j < nlist) { + if (j < nrequest && requests[j]->cudable != requests[i]->cudable) + j = nrequest; + if (j < nrequest) { requests[i]->copy = 1; requests[i]->otherlist = j; lists[i]->listcopy = lists[j]; } else { - for (j = 0; j < nlist; j++) { + for (j = 0; j < nrequest; j++) { + if (!lists[j]) continue; if (requests[i]->half && requests[j]->pair && requests[j]->skip == 0 && requests[j]->full) break; } - if (j < nlist && requests[j]->cudable != requests[i]->cudable) - j = nlist; - if (j < nlist) { + if (j < nrequest && requests[j]->cudable != requests[i]->cudable) + j = nrequest; + if (j < nrequest) { requests[i]->half = 0; requests[i]->half_from_full = 1; lists[i]->listfull = lists[j]; @@ -603,15 +620,17 @@ void Neighbor::init() // allocate initial pages for each list, except if listcopy set - for (i = 0; i < nlist; i++) + for (i = 0; i < nrequest; i++) { + if (!lists[i]) continue; if (!lists[i]->listcopy) lists[i]->setup_pages(pgsize,oneatom,requests[i]->dnum); + } // set ptrs to pair_build and stencil_create functions for each list // ptrs set to NULL if not set explicitly // also set cudable to 0 if any neigh list request is not cudable - for (i = 0; i < nlist; i++) { + for (i = 0; i < nrequest; i++) { choose_build(i,requests[i]); if (style != NSQ) choose_stencil(i,requests[i]); else stencil_create[i] = NULL; @@ -626,32 +645,37 @@ void Neighbor::init() // anyghostlist = 1 if any non-occasional list stores neighbors of ghosts anyghostlist = 0; - for (i = 0; i < nlist; i++) { - lists[i]->buildflag = 1; - if (pair_build[i] == NULL) lists[i]->buildflag = 0; - if (requests[i]->occasional) lists[i]->buildflag = 0; + for (i = 0; i < nrequest; i++) { + if (lists[i]) { + lists[i]->buildflag = 1; + if (pair_build[i] == NULL) lists[i]->buildflag = 0; + if (requests[i]->occasional) lists[i]->buildflag = 0; - lists[i]->growflag = 1; - if (requests[i]->copy) lists[i]->growflag = 0; + lists[i]->growflag = 1; + if (requests[i]->copy) lists[i]->growflag = 0; - lists[i]->stencilflag = 1; - if (style == NSQ) lists[i]->stencilflag = 0; - if (stencil_create[i] == NULL) lists[i]->stencilflag = 0; + lists[i]->stencilflag = 1; + if (style == NSQ) lists[i]->stencilflag = 0; + if (stencil_create[i] == NULL) lists[i]->stencilflag = 0; - lists[i]->ghostflag = 0; - if (requests[i]->ghost) lists[i]->ghostflag = 1; - if (requests[i]->ghost && !requests[i]->occasional) anyghostlist = 1; + lists[i]->ghostflag = 0; + if (requests[i]->ghost) lists[i]->ghostflag = 1; + if (requests[i]->ghost && !requests[i]->occasional) anyghostlist = 1; + } else init_list_flags1_kokkos(i); } #ifdef NEIGH_LIST_DEBUG - for (i = 0; i < nlist; i++) lists[i]->print_attributes(); + for (i = 0; i < nrequest; i++) lists[i]->print_attributes(); #endif // allocate atom arrays for neighbor lists that store them maxatom = atom->nmax; - for (i = 0; i < nlist; i++) - if (lists[i]->growflag) lists[i]->grow(maxatom); + for (i = 0; i < nrequest; i++) { + if (lists[i]) { + if (lists[i]->growflag) lists[i]->grow(maxatom); + } else init_list_grow_kokkos(i); + } // setup 3 vectors of pairwise neighbor lists // blist = lists whose pair_build() is invoked every reneighbor @@ -664,16 +688,18 @@ void Neighbor::init() delete [] blist; delete [] glist; delete [] slist; - blist = new int[nlist]; - glist = new int[nlist]; - slist = new int[nlist]; + blist = new int[nrequest]; + glist = new int[nrequest]; + slist = new int[nrequest]; - for (i = 0; i < nlist; i++) { - if (lists[i]->buildflag) blist[nblist++] = i; - if (lists[i]->growflag && requests[i]->occasional == 0) - glist[nglist++] = i; - if (lists[i]->stencilflag && requests[i]->occasional == 0) - slist[nslist++] = i; + for (i = 0; i < nrequest; i++) { + if (lists[i]) { + if (lists[i]->buildflag) blist[nblist++] = i; + if (lists[i]->growflag && requests[i]->occasional == 0) + glist[nglist++] = i; + if (lists[i]->stencilflag && requests[i]->occasional == 0) + slist[nslist++] = i; + } else init_list_flags2_kokkos(i); } #ifdef NEIGH_LIST_DEBUG @@ -691,12 +717,13 @@ void Neighbor::init() while (!done) { done = 1; for (i = 0; i < nblist; i++) { + if (!lists[blist[i]]) continue; NeighList *ptr = NULL; if (lists[blist[i]]->listfull) ptr = lists[blist[i]]->listfull; if (lists[blist[i]]->listcopy) ptr = lists[blist[i]]->listcopy; if (lists[blist[i]]->listskip) ptr = lists[blist[i]]->listskip; if (ptr == NULL) continue; - for (m = 0; m < nlist; m++) + for (m = 0; m < nrequest; m++) if (ptr == lists[m]) break; for (j = 0; j < nblist; j++) if (m == blist[j]) break; @@ -1399,9 +1426,13 @@ void Neighbor::build(int topoflag) // invoke building of pair and molecular neighbor lists // only for pairwise lists with buildflag set + // blist is for standard neigh lists, otherwise is a Kokkos list - for (i = 0; i < nblist; i++) - (this->*pair_build[blist[i]])(lists[blist[i]]); + for (i = 0; i < nblist; i++) { + if (lists[blist[i]]) + (this->*pair_build[blist[i]])(lists[blist[i]]); + else build_kokkos(i); + } if (atom->molecular && topoflag) build_topology(); } @@ -1643,8 +1674,10 @@ void Neighbor::setup_bins() // only done for lists with stencilflag and buildflag set for (int i = 0; i < nslist; i++) { - lists[slist[i]]->stencil_allocate(smax,style); - (this->*stencil_create[slist[i]])(lists[slist[i]],sx,sy,sz); + if (lists[slist[i]]) { + lists[slist[i]]->stencil_allocate(smax,style); + (this->*stencil_create[slist[i]])(lists[slist[i]],sx,sy,sz); + } else setup_bins_kokkos(i); } } @@ -1971,7 +2004,8 @@ bigint Neighbor::memory_usage() bytes += memory->usage(binhead,maxhead); } - for (int i = 0; i < nlist; i++) bytes += lists[i]->memory_usage(); + for (int i = 0; i < nrequest; i++) + if (lists[i]) bytes += lists[i]->memory_usage(); bytes += memory->usage(bondlist,maxbond,3); bytes += memory->usage(anglelist,maxangle,4); diff --git a/src/neighbor.h b/src/neighbor.h index 8812aa5455..ec6881d56d 100644 --- a/src/neighbor.h +++ b/src/neighbor.h @@ -176,6 +176,16 @@ class Neighbor : protected Pointers { virtual void choose_build(int, class NeighRequest *); void choose_stencil(int, class NeighRequest *); + // dummy functions provided by NeighborKokkos + + virtual void init_cutneighsq_kokkos(int) {} + virtual int init_lists_kokkos() {return 0;} + virtual void init_list_flags1_kokkos(int) {} + virtual void init_list_flags2_kokkos(int) {} + virtual void init_list_grow_kokkos(int) {} + virtual void build_kokkos(int) {} + virtual void setup_bins_kokkos(int) {} + // pairwise build functions typedef void (Neighbor::*PairPtr)(class NeighList *); diff --git a/src/pair.cpp b/src/pair.cpp index 617f85e108..2e250b7c33 100644 --- a/src/pair.cpp +++ b/src/pair.cpp @@ -89,8 +89,14 @@ Pair::Pair(LAMMPS *lmp) : Pointers(lmp) eatom = NULL; vatom = NULL; + // CUDA and KOKKOS per-fix data masks + datamask = ALL_MASK; datamask_ext = ALL_MASK; + + execution_space = Host; + datamask_read = ALL_MASK; + datamask_modify = ALL_MASK; } /* ---------------------------------------------------------------------- */ diff --git a/src/pair.h b/src/pair.h index 96b63d4f72..9f6c21019c 100644 --- a/src/pair.h +++ b/src/pair.h @@ -97,6 +97,11 @@ class Pair : protected Pointers { int compute_flag; // 0 if skip compute() + // KOKKOS host/device flag and data masks + + ExecutionSpace execution_space; + unsigned int datamask_read,datamask_modify; + Pair(class LAMMPS *); virtual ~Pair(); diff --git a/src/pair_lj_cut.h b/src/pair_lj_cut.h index cccd9d1c69..6ecd15c30c 100644 --- a/src/pair_lj_cut.h +++ b/src/pair_lj_cut.h @@ -54,7 +54,7 @@ class PairLJCut : public Pair { double **lj1,**lj2,**lj3,**lj4,**offset; double *cut_respa; - void allocate(); + virtual void allocate(); }; } diff --git a/src/update.cpp b/src/update.cpp index ab18259771..41693276f5 100644 --- a/src/update.cpp +++ b/src/update.cpp @@ -60,13 +60,8 @@ Update::Update(LAMMPS *lmp) : Pointers(lmp) minimize_style = NULL; minimize = NULL; - if (lmp->cuda) { - str = (char *) "verlet/cuda"; - create_integrate(1,&str,NULL); - } else { - str = (char *) "verlet"; - create_integrate(1,&str,NULL); - } + str = (char *) "verlet"; + create_integrate(1,&str,lmp->suffix); str = (char *) "cg"; create_minimize(1,&str);