git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@13579 f3b2605a-c512-4ea7-a41b-209d697bcdaa

This commit is contained in:
sjplimp
2015-07-14 17:39:05 +00:00
parent 4e6e4da383
commit ca4fa347b3
7 changed files with 128 additions and 61 deletions

View File

@ -155,19 +155,20 @@ if (test $1 = 1) then
sed -i -e 's/[^ \t]*KOKKOS[^ \t]* //g' ../Makefile.package
sed -i -e 's|^PKG_INC =[ \t]*|&-DLMP_KOKKOS |' ../Makefile.package
# sed -i -e 's|^PKG_PATH =[ \t]*|&-L..\/..\/lib\/kokkos\/core\/src |' ../Makefile.package
sed -i -e 's|^PKG_LIB =[ \t]*|&-lkokkoscore |' ../Makefile.package
sed -i -e 's|^PKG_SYSINC =[ \t]*|&$(KOKKOS_INC) |' ../Makefile.package
sed -i -e 's|^PKG_SYSLIB =[ \t]*|&$(KOKKOS_LINK) |' ../Makefile.package
sed -i -e 's|^PKG_CPP_DEPENDS =[ \t]*|&$(KOKKOS_CPP_DEPENDS) |' ../Makefile.package
sed -i -e 's|^PKG_LIB =[ \t]*|&$(KOKKOS_LIBS) |' ../Makefile.package
sed -i -e 's|^PKG_LINK_DEPENDS =[ \t]*|&$(KOKKOS_LINK_DEPENDS) |' ../Makefile.package
sed -i -e 's|^PKG_SYSINC =[ \t]*|&$(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) |' ../Makefile.package
sed -i -e 's|^PKG_SYSLIB =[ \t]*|&$(KOKKOS_LDFLAGS) |' ../Makefile.package
# sed -i -e 's|^PKG_SYSPATH =[ \t]*|&$(kokkos_SYSPATH) |' ../Makefile.package
fi
if (test -e ../Makefile.package.settings) then
sed -i -e '/CXX\ =\ \$(CC)/d' ../Makefile.package.settings
sed -i -e '/^include.*kokkos.*$/d' ../Makefile.package.settings
# multiline form needed for BSD sed on Macs
sed -i -e '4 i \
include ..\/..\/lib\/kokkos\/Makefile.lammps
' ../Makefile.package.settings
sed -i -e '4 i \CXX = $(CC)' ../Makefile.package.settings
sed -i -e '5 i \include ..\/..\/lib\/kokkos\/Makefile.kokkos' ../Makefile.package.settings
fi
elif (test $1 = 0) then
@ -178,6 +179,7 @@ elif (test $1 = 0) then
fi
if (test -e ../Makefile.package.settings) then
sed -i -e '/CXX\ =\ \$(CC)/d' ../Makefile.package.settings
sed -i -e '/^include.*kokkos.*$/d' ../Makefile.package.settings
fi

View File

@ -22,6 +22,68 @@
#define MAX_TYPES_STACKPARAMS 12
#define NeighClusterSize 8
struct lmp_float3 {
float x,y,z;
KOKKOS_INLINE_FUNCTION
lmp_float3():x(0.0f),z(0.0f),y(0.0f) {}
KOKKOS_INLINE_FUNCTION
void operator += (const lmp_float3& tmp) {
x+=tmp.x;
y+=tmp.y;
z+=tmp.z;
}
KOKKOS_INLINE_FUNCTION
void operator += (const lmp_float3& tmp) volatile {
x+=tmp.x;
y+=tmp.y;
z+=tmp.z;
}
KOKKOS_INLINE_FUNCTION
void operator = (const lmp_float3& tmp) {
x=tmp.x;
y=tmp.y;
z=tmp.z;
}
KOKKOS_INLINE_FUNCTION
void operator = (const lmp_float3& tmp) volatile {
x=tmp.x;
y=tmp.y;
z=tmp.z;
}
};
struct lmp_double3 {
double x,y,z;
KOKKOS_INLINE_FUNCTION
lmp_double3():x(0.0),z(0.0),y(0.0) {}
KOKKOS_INLINE_FUNCTION
void operator += (const lmp_double3& tmp) {
x+=tmp.x;
y+=tmp.y;
z+=tmp.z;
}
KOKKOS_INLINE_FUNCTION
void operator += (const lmp_double3& tmp) volatile {
x+=tmp.x;
y+=tmp.y;
z+=tmp.z;
}
KOKKOS_INLINE_FUNCTION
void operator = (const lmp_double3& tmp) {
x=tmp.x;
y=tmp.y;
z=tmp.z;
}
KOKKOS_INLINE_FUNCTION
void operator = (const lmp_double3& tmp) volatile {
x=tmp.x;
y=tmp.y;
z=tmp.z;
}
};
#if !defined(__CUDACC__) && !defined(__VECTOR_TYPES_H__)
struct double2 {
double x, y;
@ -29,14 +91,13 @@
struct float2 {
float x, y;
};
struct double4 {
double x, y, z, w;
};
struct float4 {
float x, y, z, w;
};
struct double4 {
double x, y, z, w;
};
#endif
// set LMPHostype and LMPDeviceType from Kokkos Default Types
typedef Kokkos::DefaultExecutionSpace LMPDeviceType;
typedef Kokkos::HostSpace::execution_space LMPHostType;
@ -66,10 +127,12 @@ struct ExecutionSpaceFromDevice<Kokkos::Cuda> {
#if PRECISION==1
typedef float LMP_FLOAT;
typedef float2 LMP_FLOAT2;
typedef lmp_float3 LMP_FLOAT3;
typedef float4 LMP_FLOAT4;
#else
typedef double LMP_FLOAT;
typedef double2 LMP_FLOAT2;
typedef lmp_double3 LMP_FLOAT3;
typedef double4 LMP_FLOAT4;
#endif
@ -80,10 +143,12 @@ typedef double4 LMP_FLOAT4;
#if PREC_FORCE==1
typedef float F_FLOAT;
typedef float2 F_FLOAT2;
typedef lmp_float3 F_FLOAT3;
typedef float4 F_FLOAT4;
#else
typedef double F_FLOAT;
typedef double2 F_FLOAT2;
typedef lmp_double3 F_FLOAT3;
typedef double4 F_FLOAT4;
#endif
@ -664,7 +729,7 @@ void buffer_view(BufferView &buf, DualView &view,
template<class DeviceType>
struct MemsetZeroFunctor {
typedef DeviceType device_type ;
typedef DeviceType execution_space ;
void* ptr;
KOKKOS_INLINE_FUNCTION void operator()(const int i) const {
((int*)ptr)[i] = 0;
@ -673,10 +738,10 @@ struct MemsetZeroFunctor {
template<class ViewType>
void memset_kokkos (ViewType &view) {
static MemsetZeroFunctor<typename ViewType::device_type> f;
static MemsetZeroFunctor<typename ViewType::execution_space> f;
f.ptr = view.ptr_on_device();
Kokkos::parallel_for(view.capacity()*sizeof(typename ViewType::value_type)/4, f);
ViewType::device_type::fence();
ViewType::execution_space::fence();
}

View File

@ -336,7 +336,6 @@ struct PairComputeFunctor {
template <class PairStyle, bool STACKPARAMS, class Specialisation>
struct PairComputeFunctor<PairStyle,FULLCLUSTER,STACKPARAMS,Specialisation> {
typedef typename PairStyle::device_type device_type ;
typedef Kokkos::Vectorization<device_type,NeighClusterSize> vectorization;
typedef EV_FLOAT value_type;
PairStyle c;
@ -356,7 +355,7 @@ struct PairComputeFunctor<PairStyle,FULLCLUSTER,STACKPARAMS,Specialisation> {
EV_FLOAT compute_item(const typename Kokkos::TeamPolicy<device_type>::member_type& dev,
const NeighListKokkos<device_type> &list, const NoCoulTag& ) const {
EV_FLOAT ev;
const int i = vectorization::global_thread_rank(dev);
const int i = dev.league_rank()*dev.team_size() + dev.team_rank();
const X_FLOAT xtmp = c.c_x(i,0);
const X_FLOAT ytmp = c.c_x(i,1);
@ -366,17 +365,15 @@ struct PairComputeFunctor<PairStyle,FULLCLUSTER,STACKPARAMS,Specialisation> {
const AtomNeighborsConst neighbors_i = list.get_neighbors_const(i);
const int jnum = list.d_numneigh[i];
F_FLOAT fxtmp = 0.0;
F_FLOAT fytmp = 0.0;
F_FLOAT fztmp = 0.0;
F_FLOAT3 ftmp;
for (int jj = 0; jj < jnum; jj++) {
const int jjj = neighbors_i(jj);
for (int k = vectorization::begin(); k<NeighClusterSize; k+=vectorization::increment) {
Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(dev,NeighClusterSize),[&] (const int& k, F_FLOAT3& fftmp) {
const F_FLOAT factor_lj = c.special_lj[sbmask(jjj+k)];
const int j = (jjj + k)&NEIGHMASK;
if((j==i)||(j>=c.nall)) continue;
if((j==i)||(j>=c.nall)) return;
const X_FLOAT delx = xtmp - c.c_x(j,0);
const X_FLOAT dely = ytmp - c.c_x(j,1);
const X_FLOAT delz = ztmp - c.c_x(j,2);
@ -386,9 +383,9 @@ struct PairComputeFunctor<PairStyle,FULLCLUSTER,STACKPARAMS,Specialisation> {
if(rsq < (STACKPARAMS?c.m_cutsq[itype][jtype]:c.d_cutsq(itype,jtype))) {
const F_FLOAT fpair = factor_lj*c.template compute_fpair<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype);
fxtmp += delx*fpair;
fytmp += dely*fpair;
fztmp += delz*fpair;
fftmp.x += delx*fpair;
fftmp.y += dely*fpair;
fftmp.z += delz*fpair;
if (EVFLAG) {
F_FLOAT evdwl = 0.0;
@ -401,17 +398,14 @@ struct PairComputeFunctor<PairStyle,FULLCLUSTER,STACKPARAMS,Specialisation> {
if (c.vflag_either || c.eflag_atom) ev_tally(ev,i,j,evdwl,fpair,delx,dely,delz);
}
}
}
},ftmp);
}
const F_FLOAT fx = vectorization::reduce(fxtmp);
const F_FLOAT fy = vectorization::reduce(fytmp);
const F_FLOAT fz = vectorization::reduce(fztmp);
if(vectorization::is_lane_0(dev)) {
c.f(i,0) += fx;
c.f(i,1) += fy;
c.f(i,2) += fz;
}
Kokkos::single(Kokkos::PerThread(dev), [&]() {
c.f(i,0) += ftmp.x;
c.f(i,1) += ftmp.y;
c.f(i,2) += ftmp.z;
});
return ev;
}
@ -659,12 +653,12 @@ EV_FLOAT pair_compute_fullcluster (PairStyle* fpair, typename Kokkos::Impl::enab
f_type;
f_type ff(fpair, list);
#ifdef KOKKOS_HAVE_CUDA
const int teamsize = Kokkos::Impl::is_same<typename f_type::device_type, Kokkos::Cuda>::value ? 256 : 1;
const int teamsize = Kokkos::Impl::is_same<typename f_type::device_type, Kokkos::Cuda>::value ? 32 : 1;
#else
const int teamsize = 1;
#endif
const int nteams = (list->inum*f_type::vectorization::increment+teamsize-1)/teamsize;
Kokkos::TeamPolicy<typename f_type::device_type> config(nteams,teamsize);
const int nteams = (list->inum*+teamsize-1)/teamsize;
Kokkos::TeamPolicy<typename f_type::device_type> config(nteams,teamsize,NeighClusterSize);
if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(config,ff,ev);
else Kokkos::parallel_for(config,ff);
} else {
@ -672,12 +666,12 @@ EV_FLOAT pair_compute_fullcluster (PairStyle* fpair, typename Kokkos::Impl::enab
f_type;
f_type ff(fpair, list);
#ifdef KOKKOS_HAVE_CUDA
const int teamsize = Kokkos::Impl::is_same<typename f_type::device_type, Kokkos::Cuda>::value ? 256 : 1;
const int teamsize = Kokkos::Impl::is_same<typename f_type::device_type, Kokkos::Cuda>::value ? 32 : 1;
#else
const int teamsize = 1;
#endif
const int nteams = (list->inum*f_type::vectorization::increment+teamsize-1)/teamsize;
Kokkos::TeamPolicy<typename f_type::device_type> config(nteams,teamsize);
const int nteams = (list->inum*+teamsize-1)/teamsize;
Kokkos::TeamPolicy<typename f_type::device_type> config(nteams,teamsize,NeighClusterSize);
if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(config,ff,ev);
else Kokkos::parallel_for(config,ff);
}
@ -706,12 +700,12 @@ template<class DeviceType>
struct PairVirialFDotRCompute {
typedef ArrayTypes<DeviceType> AT;
typedef EV_FLOAT value_type;
typename AT::t_x_array_const x;
typename AT::t_f_array_const f;
typename AT::t_x_array_const_um x;
typename AT::t_f_array_const_um f;
const int offset;
PairVirialFDotRCompute( typename AT::t_x_array_const x_,
typename AT::t_f_array_const f_,
PairVirialFDotRCompute( typename AT::t_x_array_const_um x_,
typename AT::t_f_array_const_um f_,
const int offset_):x(x_),f(f_),offset(offset_) {}
KOKKOS_INLINE_FUNCTION

View File

@ -147,12 +147,12 @@ void PairTableKokkos<DeviceType>::compute_style(int eflag_in, int vflag_in)
f_type;
f_type f(this,(NeighListKokkos<DeviceType>*) list);
#ifdef KOKKOS_HAVE_CUDA
const int teamsize = Kokkos::Impl::is_same<typename f_type::device_type, Kokkos::Cuda>::value ? 256 : 1;
const int teamsize = Kokkos::Impl::is_same<DeviceType, Kokkos::Cuda>::value ? 32 : 1;
#else
const int teamsize = 1;
#endif
const int nteams = (list->inum*f_type::vectorization::increment+teamsize-1)/teamsize;
Kokkos::TeamPolicy<DeviceType> config(nteams,teamsize);
const int nteams = (list->inum*+teamsize-1)/teamsize;
Kokkos::TeamPolicy<DeviceType> config(nteams,teamsize,NeighClusterSize);
if (eflag || vflag) Kokkos::parallel_reduce(config,f,ev);
else Kokkos::parallel_for(config,f);
}
@ -182,12 +182,12 @@ void PairTableKokkos<DeviceType>::compute_style(int eflag_in, int vflag_in)
f_type;
f_type f(this,(NeighListKokkos<DeviceType>*) list);
#ifdef KOKKOS_HAVE_CUDA
const int teamsize = Kokkos::Impl::is_same<typename f_type::device_type, Kokkos::Cuda>::value ? 256 : 1;
const int teamsize = Kokkos::Impl::is_same<DeviceType, Kokkos::Cuda>::value ? 32 : 1;
#else
const int teamsize = 1;
#endif
const int nteams = (list->inum*f_type::vectorization::increment+teamsize-1)/teamsize;
Kokkos::TeamPolicy<DeviceType> config(nteams,teamsize);
const int nteams = (list->inum*+teamsize-1)/teamsize;
Kokkos::TeamPolicy<DeviceType> config(nteams,teamsize,NeighClusterSize);
if (eflag || vflag) Kokkos::parallel_reduce(config,f,ev);
else Kokkos::parallel_for(config,f);
}

View File

@ -74,7 +74,9 @@ include Makefile.package
EXTRA_INC = $(LMP_INC) $(PKG_INC) $(MPI_INC) $(FFT_INC) $(JPG_INC) $(PKG_SYSINC)
EXTRA_PATH = $(PKG_PATH) $(MPI_PATH) $(FFT_PATH) $(JPG_PATH) $(PKG_SYSPATH)
EXTRA_CPP_DEPENDS = $(PKG_CPP_DEPENDS)
EXTRA_LIB = $(PKG_LIB) $(MPI_LIB) $(FFT_LIB) $(JPG_LIB) $(PKG_SYSLIB)
EXTRA_LINK_DEPENDS = $(PKG_LINK_DEPENDS)
# Path to src files
@ -83,28 +85,28 @@ vpath %.h ..
# Link target
$(EXE): $(OBJ)
$(EXE): $(OBJ) $(EXTRA_LINK_DEPENDS)
$(LINK) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(EXTRA_LIB) $(LIB) -o $(EXE)
$(SIZE) $(EXE)
# Library targets
lib: $(OBJ)
lib: $(OBJ) $(EXTRA_LINK_DEPENDS)
$(ARCHIVE) $(ARFLAGS) $(EXE) $(OBJ)
shlib: $(OBJ)
shlib: $(OBJ) $(EXTRA_LINK_DEPENDS)
$(CC) $(CCFLAGS) $(SHFLAGS) $(SHLIBFLAGS) $(EXTRA_PATH) -o $(EXE) \
$(OBJ) $(EXTRA_LIB) $(LIB)
# Compilation rules
%.o:%.cpp
%.o:%.cpp $(EXTRA_CPP_DEPENDS)
$(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $<
%.d:%.cpp
%.d:%.cpp $(EXTRA_CPP_DEPENDS)
$(CC) $(CCFLAGS) $(EXTRA_INC) $(DEPFLAGS) $< > $@
%.o:%.cu
%.o:%.cu $(EXTRA_CPP_DEPENDS)
$(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $<
# Individual dependencies

View File

@ -74,7 +74,9 @@ include Makefile.package
EXTRA_INC = $(LMP_INC) $(PKG_INC) $(MPI_INC) $(FFT_INC) $(JPG_INC) $(PKG_SYSINC)
EXTRA_PATH = $(PKG_PATH) $(MPI_PATH) $(FFT_PATH) $(JPG_PATH) $(PKG_SYSPATH)
EXTRA_CCP_DEPENDS = $(PKG_CPP_DEPENDS)
EXTRA_LIB = $(PKG_LIB) $(MPI_LIB) $(FFT_LIB) $(JPG_LIB) $(PKG_SYSLIB)
EXTRA_LINK_DEPENDS = $(PKG_LINK_DEPENDS)
# Path to src files
@ -83,28 +85,28 @@ vpath %.h ..
# Link target
$(EXE): $(OBJ)
$(EXE): $(OBJ) $(EXTRA_LINK_DEPENDS)
$(LINK) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(EXTRA_LIB) $(LIB) -o $(EXE)
$(SIZE) $(EXE)
# Library targets
lib: $(OBJ)
lib: $(OBJ) $(EXTRA_LINK_DEPENDS)
$(ARCHIVE) $(ARFLAGS) $(EXE) $(OBJ)
shlib: $(OBJ)
shlib: $(OBJ) $(EXTRA_LINK_DEPENDS)
$(CC) $(CCFLAGS) $(SHFLAGS) $(SHLIBFLAGS) $(EXTRA_PATH) -o $(EXE) \
$(OBJ) $(EXTRA_LIB) $(LIB)
# Compilation rules
%.o:%.cpp
%.o:%.cpp $(EXTRA_CPP_DEPENDS)
$(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $<
%.d:%.cpp
%.d:%.cpp $(EXTRA_CPP_DEPENDS)
$(CC) $(CCFLAGS) $(EXTRA_INC) $(DEPFLAGS) $< > $@
%.o:%.cu
%.o:%.cu $(EXTRA_CPP_DEPENDS)
$(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $<
# Individual dependencies

View File

@ -4,6 +4,8 @@
PKG_INC =
PKG_PATH =
PKG_LIB =
PKG_CPP_DEPENDS =
PKG_LINK_DEPENDS =
PKG_SYSINC =
PKG_SYSLIB =