Update Kokkos library to v2.7.00
This commit is contained in:
@ -130,30 +130,32 @@ struct SomeCorrelation {
|
||||
int main(int narg, char* args[]) {
|
||||
Kokkos::initialize(narg,args);
|
||||
|
||||
// Produce some 3D random data (see Algorithms/01_random_numbers for more info)
|
||||
Kokkos::View<int***,Kokkos::LayoutRight> data("Data",512,512,32);
|
||||
Kokkos::Random_XorShift64_Pool<> rand_pool64(5374857);
|
||||
Kokkos::fill_random(data,rand_pool64,100);
|
||||
{
|
||||
// Produce some 3D random data (see Algorithms/01_random_numbers for more info)
|
||||
Kokkos::View<int***,Kokkos::LayoutRight> data("Data",512,512,32);
|
||||
Kokkos::Random_XorShift64_Pool<> rand_pool64(5374857);
|
||||
Kokkos::fill_random(data,rand_pool64,100);
|
||||
|
||||
// A global value to put the result in
|
||||
Kokkos::View<int> gsum("Sum");
|
||||
// A global value to put the result in
|
||||
Kokkos::View<int> gsum("Sum");
|
||||
|
||||
// Each team handles a slice of the data
|
||||
// Set up TeamPolicy with 512 teams with maximum number of threads per team and 16 vector lanes.
|
||||
// Kokkos::AUTO will determine the number of threads
|
||||
// The maximum vector length is hardware dependent but can always be smaller than the hardware allows.
|
||||
// The vector length must be a power of 2.
|
||||
// Each team handles a slice of the data
|
||||
// Set up TeamPolicy with 512 teams with maximum number of threads per team and 16 vector lanes.
|
||||
// Kokkos::AUTO will determine the number of threads
|
||||
// The maximum vector length is hardware dependent but can always be smaller than the hardware allows.
|
||||
// The vector length must be a power of 2.
|
||||
|
||||
const Kokkos::TeamPolicy<> policy( 512 , Kokkos::AUTO , 16);
|
||||
const Kokkos::TeamPolicy<> policy( 512 , Kokkos::AUTO , 16);
|
||||
|
||||
Kokkos::parallel_for( policy , SomeCorrelation(data,gsum) );
|
||||
Kokkos::parallel_for( policy , SomeCorrelation(data,gsum) );
|
||||
|
||||
Kokkos::fence();
|
||||
Kokkos::fence();
|
||||
|
||||
// Copy result value back
|
||||
int sum = 0;
|
||||
Kokkos::deep_copy(sum,gsum);
|
||||
printf("Result %i\n",sum);
|
||||
// Copy result value back
|
||||
int sum = 0;
|
||||
Kokkos::deep_copy(sum,gsum);
|
||||
printf("Result %i\n",sum);
|
||||
}
|
||||
|
||||
Kokkos::finalize();
|
||||
}
|
||||
|
||||
@ -104,14 +104,15 @@ struct find_2_tuples {
|
||||
|
||||
int main(int narg, char* args[]) {
|
||||
Kokkos::initialize(narg,args);
|
||||
|
||||
|
||||
{
|
||||
int chunk_size = 1024;
|
||||
int nchunks = 100000; //1024*1024;
|
||||
Kokkos::DualView<int*> data("data",nchunks*chunk_size+1);
|
||||
|
||||
srand(1231093);
|
||||
|
||||
for(int i = 0; i < (int) data.dimension_0(); i++) {
|
||||
for(int i = 0; i < (int) data.extent(0); i++) {
|
||||
data.h_view(i) = rand()%TEAM_SIZE;
|
||||
}
|
||||
data.modify<Host>();
|
||||
@ -122,7 +123,10 @@ int main(int narg, char* args[]) {
|
||||
|
||||
Kokkos::Timer timer;
|
||||
// threads/team is automatically limited to maximum supported by the device.
|
||||
Kokkos::parallel_for( team_policy( nchunks , TEAM_SIZE )
|
||||
int team_size = TEAM_SIZE;
|
||||
if( team_size > Device::execution_space::concurrency() )
|
||||
team_size = Device::execution_space::concurrency();
|
||||
Kokkos::parallel_for( team_policy( nchunks , team_size )
|
||||
, find_2_tuples(chunk_size,data,histogram) );
|
||||
Kokkos::fence();
|
||||
double time = timer.seconds();
|
||||
@ -139,6 +143,7 @@ int main(int narg, char* args[]) {
|
||||
printf("\n");
|
||||
}
|
||||
printf("Result: %i %i\n",sum,chunk_size*nchunks);
|
||||
}
|
||||
Kokkos::finalize();
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user