diff --git a/src/KOKKOS/Install.sh b/src/KOKKOS/Install.sh index 0eea611d20..811164ff37 100644 --- a/src/KOKKOS/Install.sh +++ b/src/KOKKOS/Install.sh @@ -155,19 +155,20 @@ if (test $1 = 1) then sed -i -e 's/[^ \t]*KOKKOS[^ \t]* //g' ../Makefile.package sed -i -e 's|^PKG_INC =[ \t]*|&-DLMP_KOKKOS |' ../Makefile.package # sed -i -e 's|^PKG_PATH =[ \t]*|&-L..\/..\/lib\/kokkos\/core\/src |' ../Makefile.package - sed -i -e 's|^PKG_LIB =[ \t]*|&-lkokkoscore |' ../Makefile.package - sed -i -e 's|^PKG_SYSINC =[ \t]*|&$(KOKKOS_INC) |' ../Makefile.package - sed -i -e 's|^PKG_SYSLIB =[ \t]*|&$(KOKKOS_LINK) |' ../Makefile.package + sed -i -e 's|^PKG_CPP_DEPENDS =[ \t]*|&$(KOKKOS_CPP_DEPENDS) |' ../Makefile.package + sed -i -e 's|^PKG_LIB =[ \t]*|&$(KOKKOS_LIBS) |' ../Makefile.package + sed -i -e 's|^PKG_LINK_DEPENDS =[ \t]*|&$(KOKKOS_LINK_DEPENDS) |' ../Makefile.package + sed -i -e 's|^PKG_SYSINC =[ \t]*|&$(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) |' ../Makefile.package + sed -i -e 's|^PKG_SYSLIB =[ \t]*|&$(KOKKOS_LDFLAGS) |' ../Makefile.package # sed -i -e 's|^PKG_SYSPATH =[ \t]*|&$(kokkos_SYSPATH) |' ../Makefile.package fi if (test -e ../Makefile.package.settings) then + sed -i -e '/CXX\ =\ \$(CC)/d' ../Makefile.package.settings sed -i -e '/^include.*kokkos.*$/d' ../Makefile.package.settings # multiline form needed for BSD sed on Macs - sed -i -e '4 i \ -include ..\/..\/lib\/kokkos\/Makefile.lammps -' ../Makefile.package.settings - + sed -i -e '4 i \CXX = $(CC)' ../Makefile.package.settings + sed -i -e '5 i \include ..\/..\/lib\/kokkos\/Makefile.kokkos' ../Makefile.package.settings fi elif (test $1 = 0) then @@ -178,6 +179,7 @@ elif (test $1 = 0) then fi if (test -e ../Makefile.package.settings) then + sed -i -e '/CXX\ =\ \$(CC)/d' ../Makefile.package.settings sed -i -e '/^include.*kokkos.*$/d' ../Makefile.package.settings fi diff --git a/src/KOKKOS/kokkos_type.h b/src/KOKKOS/kokkos_type.h index 123bbd1a8c..1f9087c3cc 100644 --- a/src/KOKKOS/kokkos_type.h +++ b/src/KOKKOS/kokkos_type.h @@ -22,6 +22,68 @@ #define MAX_TYPES_STACKPARAMS 12 #define NeighClusterSize 8 + struct lmp_float3 { + float x,y,z; + KOKKOS_INLINE_FUNCTION + lmp_float3():x(0.0f),z(0.0f),y(0.0f) {} + + KOKKOS_INLINE_FUNCTION + void operator += (const lmp_float3& tmp) { + x+=tmp.x; + y+=tmp.y; + z+=tmp.z; + } + KOKKOS_INLINE_FUNCTION + void operator += (const lmp_float3& tmp) volatile { + x+=tmp.x; + y+=tmp.y; + z+=tmp.z; + } + KOKKOS_INLINE_FUNCTION + void operator = (const lmp_float3& tmp) { + x=tmp.x; + y=tmp.y; + z=tmp.z; + } + KOKKOS_INLINE_FUNCTION + void operator = (const lmp_float3& tmp) volatile { + x=tmp.x; + y=tmp.y; + z=tmp.z; + } + }; + + struct lmp_double3 { + double x,y,z; + KOKKOS_INLINE_FUNCTION + lmp_double3():x(0.0),z(0.0),y(0.0) {} + + KOKKOS_INLINE_FUNCTION + void operator += (const lmp_double3& tmp) { + x+=tmp.x; + y+=tmp.y; + z+=tmp.z; + } + KOKKOS_INLINE_FUNCTION + void operator += (const lmp_double3& tmp) volatile { + x+=tmp.x; + y+=tmp.y; + z+=tmp.z; + } + KOKKOS_INLINE_FUNCTION + void operator = (const lmp_double3& tmp) { + x=tmp.x; + y=tmp.y; + z=tmp.z; + } + KOKKOS_INLINE_FUNCTION + void operator = (const lmp_double3& tmp) volatile { + x=tmp.x; + y=tmp.y; + z=tmp.z; + } + }; + #if !defined(__CUDACC__) && !defined(__VECTOR_TYPES_H__) struct double2 { double x, y; @@ -29,14 +91,13 @@ struct float2 { float x, y; }; - struct double4 { - double x, y, z, w; - }; struct float4 { float x, y, z, w; }; + struct double4 { + double x, y, z, w; + }; #endif - // set LMPHostype and LMPDeviceType from Kokkos Default Types typedef Kokkos::DefaultExecutionSpace LMPDeviceType; typedef Kokkos::HostSpace::execution_space LMPHostType; @@ -66,10 +127,12 @@ struct ExecutionSpaceFromDevice { #if PRECISION==1 typedef float LMP_FLOAT; typedef float2 LMP_FLOAT2; +typedef lmp_float3 LMP_FLOAT3; typedef float4 LMP_FLOAT4; #else typedef double LMP_FLOAT; typedef double2 LMP_FLOAT2; +typedef lmp_double3 LMP_FLOAT3; typedef double4 LMP_FLOAT4; #endif @@ -80,10 +143,12 @@ typedef double4 LMP_FLOAT4; #if PREC_FORCE==1 typedef float F_FLOAT; typedef float2 F_FLOAT2; +typedef lmp_float3 F_FLOAT3; typedef float4 F_FLOAT4; #else typedef double F_FLOAT; typedef double2 F_FLOAT2; +typedef lmp_double3 F_FLOAT3; typedef double4 F_FLOAT4; #endif @@ -664,7 +729,7 @@ void buffer_view(BufferView &buf, DualView &view, template struct MemsetZeroFunctor { - typedef DeviceType device_type ; + typedef DeviceType execution_space ; void* ptr; KOKKOS_INLINE_FUNCTION void operator()(const int i) const { ((int*)ptr)[i] = 0; @@ -673,10 +738,10 @@ struct MemsetZeroFunctor { template void memset_kokkos (ViewType &view) { - static MemsetZeroFunctor f; + static MemsetZeroFunctor f; f.ptr = view.ptr_on_device(); Kokkos::parallel_for(view.capacity()*sizeof(typename ViewType::value_type)/4, f); - ViewType::device_type::fence(); + ViewType::execution_space::fence(); } diff --git a/src/KOKKOS/pair_kokkos.h b/src/KOKKOS/pair_kokkos.h index f3bef77b86..5f9b347ddf 100644 --- a/src/KOKKOS/pair_kokkos.h +++ b/src/KOKKOS/pair_kokkos.h @@ -336,7 +336,6 @@ struct PairComputeFunctor { template struct PairComputeFunctor { typedef typename PairStyle::device_type device_type ; - typedef Kokkos::Vectorization vectorization; typedef EV_FLOAT value_type; PairStyle c; @@ -356,7 +355,7 @@ struct PairComputeFunctor { EV_FLOAT compute_item(const typename Kokkos::TeamPolicy::member_type& dev, const NeighListKokkos &list, const NoCoulTag& ) const { EV_FLOAT ev; - const int i = vectorization::global_thread_rank(dev); + const int i = dev.league_rank()*dev.team_size() + dev.team_rank(); const X_FLOAT xtmp = c.c_x(i,0); const X_FLOAT ytmp = c.c_x(i,1); @@ -366,17 +365,15 @@ struct PairComputeFunctor { const AtomNeighborsConst neighbors_i = list.get_neighbors_const(i); const int jnum = list.d_numneigh[i]; - F_FLOAT fxtmp = 0.0; - F_FLOAT fytmp = 0.0; - F_FLOAT fztmp = 0.0; + F_FLOAT3 ftmp; for (int jj = 0; jj < jnum; jj++) { const int jjj = neighbors_i(jj); - for (int k = vectorization::begin(); k=c.nall)) continue; + if((j==i)||(j>=c.nall)) return; const X_FLOAT delx = xtmp - c.c_x(j,0); const X_FLOAT dely = ytmp - c.c_x(j,1); const X_FLOAT delz = ztmp - c.c_x(j,2); @@ -386,9 +383,9 @@ struct PairComputeFunctor { if(rsq < (STACKPARAMS?c.m_cutsq[itype][jtype]:c.d_cutsq(itype,jtype))) { const F_FLOAT fpair = factor_lj*c.template compute_fpair(rsq,i,j,itype,jtype); - fxtmp += delx*fpair; - fytmp += dely*fpair; - fztmp += delz*fpair; + fftmp.x += delx*fpair; + fftmp.y += dely*fpair; + fftmp.z += delz*fpair; if (EVFLAG) { F_FLOAT evdwl = 0.0; @@ -401,17 +398,14 @@ struct PairComputeFunctor { if (c.vflag_either || c.eflag_atom) ev_tally(ev,i,j,evdwl,fpair,delx,dely,delz); } } - } + },ftmp); } - const F_FLOAT fx = vectorization::reduce(fxtmp); - const F_FLOAT fy = vectorization::reduce(fytmp); - const F_FLOAT fz = vectorization::reduce(fztmp); - if(vectorization::is_lane_0(dev)) { - c.f(i,0) += fx; - c.f(i,1) += fy; - c.f(i,2) += fz; - } + Kokkos::single(Kokkos::PerThread(dev), [&]() { + c.f(i,0) += ftmp.x; + c.f(i,1) += ftmp.y; + c.f(i,2) += ftmp.z; + }); return ev; } @@ -659,12 +653,12 @@ EV_FLOAT pair_compute_fullcluster (PairStyle* fpair, typename Kokkos::Impl::enab f_type; f_type ff(fpair, list); #ifdef KOKKOS_HAVE_CUDA - const int teamsize = Kokkos::Impl::is_same::value ? 256 : 1; + const int teamsize = Kokkos::Impl::is_same::value ? 32 : 1; #else const int teamsize = 1; #endif - const int nteams = (list->inum*f_type::vectorization::increment+teamsize-1)/teamsize; - Kokkos::TeamPolicy config(nteams,teamsize); + const int nteams = (list->inum*+teamsize-1)/teamsize; + Kokkos::TeamPolicy config(nteams,teamsize,NeighClusterSize); if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(config,ff,ev); else Kokkos::parallel_for(config,ff); } else { @@ -672,12 +666,12 @@ EV_FLOAT pair_compute_fullcluster (PairStyle* fpair, typename Kokkos::Impl::enab f_type; f_type ff(fpair, list); #ifdef KOKKOS_HAVE_CUDA - const int teamsize = Kokkos::Impl::is_same::value ? 256 : 1; + const int teamsize = Kokkos::Impl::is_same::value ? 32 : 1; #else const int teamsize = 1; #endif - const int nteams = (list->inum*f_type::vectorization::increment+teamsize-1)/teamsize; - Kokkos::TeamPolicy config(nteams,teamsize); + const int nteams = (list->inum*+teamsize-1)/teamsize; + Kokkos::TeamPolicy config(nteams,teamsize,NeighClusterSize); if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(config,ff,ev); else Kokkos::parallel_for(config,ff); } @@ -706,12 +700,12 @@ template struct PairVirialFDotRCompute { typedef ArrayTypes AT; typedef EV_FLOAT value_type; - typename AT::t_x_array_const x; - typename AT::t_f_array_const f; + typename AT::t_x_array_const_um x; + typename AT::t_f_array_const_um f; const int offset; - PairVirialFDotRCompute( typename AT::t_x_array_const x_, - typename AT::t_f_array_const f_, + PairVirialFDotRCompute( typename AT::t_x_array_const_um x_, + typename AT::t_f_array_const_um f_, const int offset_):x(x_),f(f_),offset(offset_) {} KOKKOS_INLINE_FUNCTION diff --git a/src/KOKKOS/pair_table_kokkos.cpp b/src/KOKKOS/pair_table_kokkos.cpp index dfd6787c95..f0c6068bbe 100644 --- a/src/KOKKOS/pair_table_kokkos.cpp +++ b/src/KOKKOS/pair_table_kokkos.cpp @@ -147,12 +147,12 @@ void PairTableKokkos::compute_style(int eflag_in, int vflag_in) f_type; f_type f(this,(NeighListKokkos*) list); #ifdef KOKKOS_HAVE_CUDA - const int teamsize = Kokkos::Impl::is_same::value ? 256 : 1; + const int teamsize = Kokkos::Impl::is_same::value ? 32 : 1; #else const int teamsize = 1; #endif - const int nteams = (list->inum*f_type::vectorization::increment+teamsize-1)/teamsize; - Kokkos::TeamPolicy config(nteams,teamsize); + const int nteams = (list->inum*+teamsize-1)/teamsize; + Kokkos::TeamPolicy config(nteams,teamsize,NeighClusterSize); if (eflag || vflag) Kokkos::parallel_reduce(config,f,ev); else Kokkos::parallel_for(config,f); } @@ -182,12 +182,12 @@ void PairTableKokkos::compute_style(int eflag_in, int vflag_in) f_type; f_type f(this,(NeighListKokkos*) list); #ifdef KOKKOS_HAVE_CUDA - const int teamsize = Kokkos::Impl::is_same::value ? 256 : 1; + const int teamsize = Kokkos::Impl::is_same::value ? 32 : 1; #else const int teamsize = 1; #endif - const int nteams = (list->inum*f_type::vectorization::increment+teamsize-1)/teamsize; - Kokkos::TeamPolicy config(nteams,teamsize); + const int nteams = (list->inum*+teamsize-1)/teamsize; + Kokkos::TeamPolicy config(nteams,teamsize,NeighClusterSize); if (eflag || vflag) Kokkos::parallel_reduce(config,f,ev); else Kokkos::parallel_for(config,f); } diff --git a/src/MAKE/Makefile.mpi b/src/MAKE/Makefile.mpi index 3d1766bbd0..9497fc7c92 100755 --- a/src/MAKE/Makefile.mpi +++ b/src/MAKE/Makefile.mpi @@ -74,7 +74,9 @@ include Makefile.package EXTRA_INC = $(LMP_INC) $(PKG_INC) $(MPI_INC) $(FFT_INC) $(JPG_INC) $(PKG_SYSINC) EXTRA_PATH = $(PKG_PATH) $(MPI_PATH) $(FFT_PATH) $(JPG_PATH) $(PKG_SYSPATH) +EXTRA_CPP_DEPENDS = $(PKG_CPP_DEPENDS) EXTRA_LIB = $(PKG_LIB) $(MPI_LIB) $(FFT_LIB) $(JPG_LIB) $(PKG_SYSLIB) +EXTRA_LINK_DEPENDS = $(PKG_LINK_DEPENDS) # Path to src files @@ -83,28 +85,28 @@ vpath %.h .. # Link target -$(EXE): $(OBJ) +$(EXE): $(OBJ) $(EXTRA_LINK_DEPENDS) $(LINK) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(EXTRA_LIB) $(LIB) -o $(EXE) $(SIZE) $(EXE) # Library targets -lib: $(OBJ) +lib: $(OBJ) $(EXTRA_LINK_DEPENDS) $(ARCHIVE) $(ARFLAGS) $(EXE) $(OBJ) -shlib: $(OBJ) +shlib: $(OBJ) $(EXTRA_LINK_DEPENDS) $(CC) $(CCFLAGS) $(SHFLAGS) $(SHLIBFLAGS) $(EXTRA_PATH) -o $(EXE) \ $(OBJ) $(EXTRA_LIB) $(LIB) # Compilation rules -%.o:%.cpp +%.o:%.cpp $(EXTRA_CPP_DEPENDS) $(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $< -%.d:%.cpp +%.d:%.cpp $(EXTRA_CPP_DEPENDS) $(CC) $(CCFLAGS) $(EXTRA_INC) $(DEPFLAGS) $< > $@ -%.o:%.cu +%.o:%.cu $(EXTRA_CPP_DEPENDS) $(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $< # Individual dependencies diff --git a/src/MAKE/Makefile.serial b/src/MAKE/Makefile.serial index 767624c6de..efc5fed755 100755 --- a/src/MAKE/Makefile.serial +++ b/src/MAKE/Makefile.serial @@ -74,7 +74,9 @@ include Makefile.package EXTRA_INC = $(LMP_INC) $(PKG_INC) $(MPI_INC) $(FFT_INC) $(JPG_INC) $(PKG_SYSINC) EXTRA_PATH = $(PKG_PATH) $(MPI_PATH) $(FFT_PATH) $(JPG_PATH) $(PKG_SYSPATH) +EXTRA_CCP_DEPENDS = $(PKG_CPP_DEPENDS) EXTRA_LIB = $(PKG_LIB) $(MPI_LIB) $(FFT_LIB) $(JPG_LIB) $(PKG_SYSLIB) +EXTRA_LINK_DEPENDS = $(PKG_LINK_DEPENDS) # Path to src files @@ -83,28 +85,28 @@ vpath %.h .. # Link target -$(EXE): $(OBJ) +$(EXE): $(OBJ) $(EXTRA_LINK_DEPENDS) $(LINK) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(EXTRA_LIB) $(LIB) -o $(EXE) $(SIZE) $(EXE) # Library targets -lib: $(OBJ) +lib: $(OBJ) $(EXTRA_LINK_DEPENDS) $(ARCHIVE) $(ARFLAGS) $(EXE) $(OBJ) -shlib: $(OBJ) +shlib: $(OBJ) $(EXTRA_LINK_DEPENDS) $(CC) $(CCFLAGS) $(SHFLAGS) $(SHLIBFLAGS) $(EXTRA_PATH) -o $(EXE) \ $(OBJ) $(EXTRA_LIB) $(LIB) # Compilation rules -%.o:%.cpp +%.o:%.cpp $(EXTRA_CPP_DEPENDS) $(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $< -%.d:%.cpp +%.d:%.cpp $(EXTRA_CPP_DEPENDS) $(CC) $(CCFLAGS) $(EXTRA_INC) $(DEPFLAGS) $< > $@ -%.o:%.cu +%.o:%.cu $(EXTRA_CPP_DEPENDS) $(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $< # Individual dependencies diff --git a/src/Makefile.package.empty b/src/Makefile.package.empty index 0d8e6c175f..d421877e2d 100644 --- a/src/Makefile.package.empty +++ b/src/Makefile.package.empty @@ -4,6 +4,8 @@ PKG_INC = PKG_PATH = PKG_LIB = +PKG_CPP_DEPENDS = +PKG_LINK_DEPENDS = PKG_SYSINC = PKG_SYSLIB =