Updating Kokkos lib
This commit is contained in:
@ -476,54 +476,54 @@ namespace Kokkos {
|
||||
};
|
||||
|
||||
template<class Generator>
|
||||
struct rand<Generator, ::Kokkos::complex<float> > {
|
||||
struct rand<Generator, Kokkos::complex<float> > {
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static ::Kokkos::complex<float> max () {
|
||||
return ::Kokkos::complex<float> (1.0, 1.0);
|
||||
static Kokkos::complex<float> max () {
|
||||
return Kokkos::complex<float> (1.0, 1.0);
|
||||
}
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static ::Kokkos::complex<float> draw (Generator& gen) {
|
||||
static Kokkos::complex<float> draw (Generator& gen) {
|
||||
const float re = gen.frand ();
|
||||
const float im = gen.frand ();
|
||||
return ::Kokkos::complex<float> (re, im);
|
||||
return Kokkos::complex<float> (re, im);
|
||||
}
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static ::Kokkos::complex<float> draw (Generator& gen, const ::Kokkos::complex<float>& range) {
|
||||
static Kokkos::complex<float> draw (Generator& gen, const Kokkos::complex<float>& range) {
|
||||
const float re = gen.frand (real (range));
|
||||
const float im = gen.frand (imag (range));
|
||||
return ::Kokkos::complex<float> (re, im);
|
||||
return Kokkos::complex<float> (re, im);
|
||||
}
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static ::Kokkos::complex<float> draw (Generator& gen, const ::Kokkos::complex<float>& start, const ::Kokkos::complex<float>& end) {
|
||||
static Kokkos::complex<float> draw (Generator& gen, const Kokkos::complex<float>& start, const Kokkos::complex<float>& end) {
|
||||
const float re = gen.frand (real (start), real (end));
|
||||
const float im = gen.frand (imag (start), imag (end));
|
||||
return ::Kokkos::complex<float> (re, im);
|
||||
return Kokkos::complex<float> (re, im);
|
||||
}
|
||||
};
|
||||
|
||||
template<class Generator>
|
||||
struct rand<Generator, ::Kokkos::complex<double> > {
|
||||
struct rand<Generator, Kokkos::complex<double> > {
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static ::Kokkos::complex<double> max () {
|
||||
return ::Kokkos::complex<double> (1.0, 1.0);
|
||||
static Kokkos::complex<double> max () {
|
||||
return Kokkos::complex<double> (1.0, 1.0);
|
||||
}
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static ::Kokkos::complex<double> draw (Generator& gen) {
|
||||
static Kokkos::complex<double> draw (Generator& gen) {
|
||||
const double re = gen.drand ();
|
||||
const double im = gen.drand ();
|
||||
return ::Kokkos::complex<double> (re, im);
|
||||
return Kokkos::complex<double> (re, im);
|
||||
}
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static ::Kokkos::complex<double> draw (Generator& gen, const ::Kokkos::complex<double>& range) {
|
||||
static Kokkos::complex<double> draw (Generator& gen, const Kokkos::complex<double>& range) {
|
||||
const double re = gen.drand (real (range));
|
||||
const double im = gen.drand (imag (range));
|
||||
return ::Kokkos::complex<double> (re, im);
|
||||
return Kokkos::complex<double> (re, im);
|
||||
}
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static ::Kokkos::complex<double> draw (Generator& gen, const ::Kokkos::complex<double>& start, const ::Kokkos::complex<double>& end) {
|
||||
static Kokkos::complex<double> draw (Generator& gen, const Kokkos::complex<double>& start, const Kokkos::complex<double>& end) {
|
||||
const double re = gen.drand (real (start), real (end));
|
||||
const double im = gen.drand (imag (start), imag (end));
|
||||
return ::Kokkos::complex<double> (re, im);
|
||||
return Kokkos::complex<double> (re, im);
|
||||
}
|
||||
};
|
||||
|
||||
@ -670,8 +670,8 @@ namespace Kokkos {
|
||||
double S = 2.0;
|
||||
double U;
|
||||
while(S>=1.0) {
|
||||
U = drand();
|
||||
const double V = drand();
|
||||
U = 2.0*drand() - 1.0;
|
||||
const double V = 2.0*drand() - 1.0;
|
||||
S = U*U+V*V;
|
||||
}
|
||||
return U*sqrt(-2.0*log(S)/S);
|
||||
@ -910,8 +910,8 @@ namespace Kokkos {
|
||||
double S = 2.0;
|
||||
double U;
|
||||
while(S>=1.0) {
|
||||
U = drand();
|
||||
const double V = drand();
|
||||
U = 2.0*drand() - 1.0;
|
||||
const double V = 2.0*drand() - 1.0;
|
||||
S = U*U+V*V;
|
||||
}
|
||||
return U*sqrt(-2.0*log(S)/S);
|
||||
@ -1163,8 +1163,8 @@ namespace Kokkos {
|
||||
double S = 2.0;
|
||||
double U;
|
||||
while(S>=1.0) {
|
||||
U = drand();
|
||||
const double V = drand();
|
||||
U = 2.0*drand() - 1.0;
|
||||
const double V = 2.0*drand() - 1.0;
|
||||
S = U*U+V*V;
|
||||
}
|
||||
return U*sqrt(-2.0*log(S)/S);
|
||||
|
||||
@ -51,7 +51,7 @@
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
namespace SortImpl {
|
||||
namespace Impl {
|
||||
|
||||
template<class ValuesViewType, int Rank=ValuesViewType::Rank>
|
||||
struct CopyOp;
|
||||
@ -199,7 +199,7 @@ public:
|
||||
|
||||
parallel_for(values.dimension_0(),
|
||||
bin_sort_sort_functor<ValuesViewType, offset_type,
|
||||
SortImpl::CopyOp<ValuesViewType> >(values,sorted_values,sort_order));
|
||||
Impl::CopyOp<ValuesViewType> >(values,sorted_values,sort_order));
|
||||
|
||||
deep_copy(values,sorted_values);
|
||||
}
|
||||
@ -262,17 +262,15 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
namespace SortImpl {
|
||||
|
||||
template<class KeyViewType>
|
||||
struct DefaultBinOp1D {
|
||||
struct BinOp1D {
|
||||
const int max_bins_;
|
||||
const double mul_;
|
||||
typename KeyViewType::const_value_type range_;
|
||||
typename KeyViewType::const_value_type min_;
|
||||
|
||||
//Construct BinOp with number of bins, minimum value and maxuimum value
|
||||
DefaultBinOp1D(int max_bins__, typename KeyViewType::const_value_type min,
|
||||
BinOp1D(int max_bins__, typename KeyViewType::const_value_type min,
|
||||
typename KeyViewType::const_value_type max )
|
||||
:max_bins_(max_bins__+1),mul_(1.0*max_bins__/(max-min)),range_(max-min),min_(min) {}
|
||||
|
||||
@ -298,13 +296,13 @@ struct DefaultBinOp1D {
|
||||
};
|
||||
|
||||
template<class KeyViewType>
|
||||
struct DefaultBinOp3D {
|
||||
struct BinOp3D {
|
||||
int max_bins_[3];
|
||||
double mul_[3];
|
||||
typename KeyViewType::non_const_value_type range_[3];
|
||||
typename KeyViewType::non_const_value_type min_[3];
|
||||
|
||||
DefaultBinOp3D(int max_bins__[], typename KeyViewType::const_value_type min[],
|
||||
BinOp3D(int max_bins__[], typename KeyViewType::const_value_type min[],
|
||||
typename KeyViewType::const_value_type max[] )
|
||||
{
|
||||
max_bins_[0] = max_bins__[0]+1;
|
||||
@ -348,109 +346,11 @@ struct DefaultBinOp3D {
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Scalar>
|
||||
struct min_max {
|
||||
Scalar min;
|
||||
Scalar max;
|
||||
bool init;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
min_max() {
|
||||
min = 0;
|
||||
max = 0;
|
||||
init = 0;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
min_max (const min_max& val) {
|
||||
min = val.min;
|
||||
max = val.max;
|
||||
init = val.init;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
min_max operator = (const min_max& val) {
|
||||
min = val.min;
|
||||
max = val.max;
|
||||
init = val.init;
|
||||
return *this;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator+= (const Scalar& val) {
|
||||
if(init) {
|
||||
min = min<val?min:val;
|
||||
max = max>val?max:val;
|
||||
} else {
|
||||
min = val;
|
||||
max = val;
|
||||
init = 1;
|
||||
}
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator+= (const min_max& val) {
|
||||
if(init && val.init) {
|
||||
min = min<val.min?min:val.min;
|
||||
max = max>val.max?max:val.max;
|
||||
} else {
|
||||
if(val.init) {
|
||||
min = val.min;
|
||||
max = val.max;
|
||||
init = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator+= (volatile const Scalar& val) volatile {
|
||||
if(init) {
|
||||
min = min<val?min:val;
|
||||
max = max>val?max:val;
|
||||
} else {
|
||||
min = val;
|
||||
max = val;
|
||||
init = 1;
|
||||
}
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator+= (volatile const min_max& val) volatile {
|
||||
if(init && val.init) {
|
||||
min = min<val.min?min:val.min;
|
||||
max = max>val.max?max:val.max;
|
||||
} else {
|
||||
if(val.init) {
|
||||
min = val.min;
|
||||
max = val.max;
|
||||
init = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template<class ViewType>
|
||||
struct min_max_functor {
|
||||
typedef typename ViewType::execution_space execution_space;
|
||||
ViewType view;
|
||||
typedef min_max<typename ViewType::non_const_value_type> value_type;
|
||||
min_max_functor (const ViewType view_):view(view_) {
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(const size_t& i, value_type& val) const {
|
||||
val += view(i);
|
||||
}
|
||||
};
|
||||
namespace Impl {
|
||||
|
||||
template<class ViewType>
|
||||
bool try_std_sort(ViewType view) {
|
||||
bool possible = true;
|
||||
#if ! KOKKOS_USING_EXP_VIEW
|
||||
size_t stride[8];
|
||||
view.stride(stride);
|
||||
#else
|
||||
size_t stride[8] = { view.stride_0()
|
||||
, view.stride_1()
|
||||
, view.stride_2()
|
||||
@ -460,8 +360,7 @@ bool try_std_sort(ViewType view) {
|
||||
, view.stride_6()
|
||||
, view.stride_7()
|
||||
};
|
||||
#endif
|
||||
possible = possible && Impl::is_same<typename ViewType::memory_space, HostSpace>::value;
|
||||
possible = possible && std::is_same<typename ViewType::memory_space, HostSpace>::value;
|
||||
possible = possible && (ViewType::Rank == 1);
|
||||
possible = possible && (stride[0] == 1);
|
||||
if(possible) {
|
||||
@ -470,27 +369,39 @@ bool try_std_sort(ViewType view) {
|
||||
return possible;
|
||||
}
|
||||
|
||||
template<class ViewType>
|
||||
struct min_max_functor {
|
||||
typedef Kokkos::Experimental::MinMaxScalar<typename ViewType::non_const_value_type> minmax_scalar;
|
||||
|
||||
ViewType view;
|
||||
min_max_functor(const ViewType& view_):view(view_) {}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (const size_t& i, minmax_scalar& minmax) const {
|
||||
if(view(i) < minmax.min_val) minmax.min_val = view(i);
|
||||
if(view(i) > minmax.max_val) minmax.max_val = view(i);
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
template<class ViewType>
|
||||
void sort(ViewType view, bool always_use_kokkos_sort = false) {
|
||||
if(!always_use_kokkos_sort) {
|
||||
if(SortImpl::try_std_sort(view)) return;
|
||||
if(Impl::try_std_sort(view)) return;
|
||||
}
|
||||
typedef BinOp1D<ViewType> CompType;
|
||||
|
||||
typedef SortImpl::DefaultBinOp1D<ViewType> CompType;
|
||||
SortImpl::min_max<typename ViewType::non_const_value_type> val;
|
||||
parallel_reduce(view.dimension_0(),SortImpl::min_max_functor<ViewType>(view),val);
|
||||
BinSort<ViewType, CompType> bin_sort(view,CompType(view.dimension_0()/2,val.min,val.max),true);
|
||||
Kokkos::Experimental::MinMaxScalar<typename ViewType::non_const_value_type> result;
|
||||
Kokkos::Experimental::MinMax<typename ViewType::non_const_value_type> reducer(result);
|
||||
parallel_reduce(Kokkos::RangePolicy<typename ViewType::execution_space>(0,view.dimension_0()),
|
||||
Impl::min_max_functor<ViewType>(view),reducer);
|
||||
if(result.min_val == result.max_val) return;
|
||||
BinSort<ViewType, CompType> bin_sort(view,CompType(view.dimension_0()/2,result.min_val,result.max_val),true);
|
||||
bin_sort.create_permute_vector();
|
||||
bin_sort.sort(view);
|
||||
}
|
||||
|
||||
/*template<class ViewType, class Comparator>
|
||||
void sort(ViewType view, Comparator comp, bool always_use_kokkos_sort = false) {
|
||||
|
||||
}*/
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
|
||||
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
|
||||
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
|
||||
INCLUDE_DIRECTORIES(REQUIRED_DURING_INSTALLATION_TESTING ${CMAKE_CURRENT_SOURCE_DIR})
|
||||
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../src )
|
||||
|
||||
SET(SOURCES
|
||||
|
||||
@ -7,21 +7,18 @@ vpath %.cpp ${KOKKOS_PATH}/algorithms/unit_tests
|
||||
default: build_all
|
||||
echo "End Build"
|
||||
|
||||
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
|
||||
CXX = $(KOKKOS_PATH)/config/nvcc_wrapper
|
||||
else
|
||||
CXX = g++
|
||||
endif
|
||||
|
||||
CXXFLAGS = -O3
|
||||
LINK ?= $(CXX)
|
||||
LDFLAGS ?= -lpthread
|
||||
|
||||
include $(KOKKOS_PATH)/Makefile.kokkos
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
|
||||
CXX = $(NVCC_WRAPPER)
|
||||
CXXFLAGS ?= -O3
|
||||
LINK = $(CXX)
|
||||
LDFLAGS ?= -lpthread
|
||||
else
|
||||
CXX ?= g++
|
||||
CXXFLAGS ?= -O3
|
||||
LINK ?= $(CXX)
|
||||
LDFLAGS ?= -lpthread
|
||||
endif
|
||||
|
||||
KOKKOS_CXXFLAGS += -I$(GTEST_PATH) -I${KOKKOS_PATH}/algorithms/unit_tests
|
||||
|
||||
TEST_TARGETS =
|
||||
|
||||
@ -131,6 +131,10 @@ void test_1D_sort(unsigned int n,bool force_kokkos) {
|
||||
typedef Kokkos::View<KeyType*,ExecutionSpace> KeyViewType;
|
||||
KeyViewType keys("Keys",n);
|
||||
|
||||
// Test sorting array with all numbers equal
|
||||
Kokkos::deep_copy(keys,KeyType(1));
|
||||
Kokkos::sort(keys,force_kokkos);
|
||||
|
||||
Kokkos::Random_XorShift64_Pool<ExecutionSpace> g(1931);
|
||||
Kokkos::fill_random(keys,g,Kokkos::Random_XorShift64_Pool<ExecutionSpace>::generator_type::MAX_URAND);
|
||||
|
||||
@ -174,7 +178,7 @@ void test_3D_sort(unsigned int n) {
|
||||
typename KeyViewType::value_type min[3] = {0,0,0};
|
||||
typename KeyViewType::value_type max[3] = {100,100,100};
|
||||
|
||||
typedef Kokkos::SortImpl::DefaultBinOp3D< KeyViewType > BinOp;
|
||||
typedef Kokkos::BinOp3D< KeyViewType > BinOp;
|
||||
BinOp bin_op(bin_max,min,max);
|
||||
Kokkos::BinSort< KeyViewType , BinOp >
|
||||
Sorter(keys,bin_op,false);
|
||||
|
||||
Reference in New Issue
Block a user