git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@14370 f3b2605a-c512-4ea7-a41b-209d697bcdaa

This commit is contained in:
sjplimp
2015-12-15 22:26:37 +00:00
parent 31f22919ab
commit 06a217aa08
327 changed files with 0 additions and 95949 deletions

View File

@ -1,43 +0,0 @@
KOKKOS_PATH = ../../../..
SRC = $(wildcard *.cpp)
default: build
echo "Start Build"
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
CXX = nvcc_wrapper
CXXFLAGS = -O3
LINK = ${CXX}
LINKFLAGS =
EXE = $(SRC:.cpp=.cuda)
KOKKOS_DEVICES = "Cuda,OpenMP"
KOKKOS_ARCH = "SNB,Kepler35"
else
CXX = g++
CXXFLAGS = -O3
LINK = ${CXX}
LINKFLAGS =
EXE = $(SRC:.cpp=.host)
KOKKOS_DEVICES = "OpenMP"
KOKKOS_ARCH = "SNB"
endif
DEPFLAGS = -M
OBJ = $(SRC:.cpp=.o)
LIB =
include $(KOKKOS_PATH)/Makefile.kokkos
build: $(EXE)
$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
clean: kokkos-clean
rm -f *.o *.cuda *.host
# Compilation rules
%.o:%.cpp $(KOKKOS_CPP_DEPENDS)
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<

View File

@ -1,214 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <Kokkos_Core.hpp>
#include <Kokkos_DualView.hpp>
#include <impl/Kokkos_Timer.hpp>
#include <cstdio>
#include <cstdlib>
// DualView helps you manage data and computations that take place on
// two different memory spaces. Examples include CUDA device memory
// and (CPU) host memory (currently implemented), or Intel Knights
// Landing MCDRAM and DRAM (not yet implemented). For example, if you
// have ported only some parts of you application to run in CUDA,
// DualView can help manage moving data between the parts of your
// application that work best with CUDA, and the parts that work
// better on the CPU.
//
// A DualView takes the same template parameters as a View, but
// contains two Views: One that lives in the DualView's memory space,
// and one that lives in that memory space's host mirror space. If
// both memory spaces are the same, then the two Views just alias one
// another. This means that you can use DualView all the time, even
// when not running in a memory space like CUDA. DualView's
// operations to help you manage memory take almost no time in that
// case. This makes your code even more performance portable.
typedef Kokkos::DualView<double*> view_type;
typedef Kokkos::DualView<int**> idx_type;
template<class ExecutionSpace>
struct localsum {
// If the functor has a public 'execution_space' typedef, that defines
// the functor's execution space (where it runs in parallel). This
// overrides Kokkos' default execution space.
typedef ExecutionSpace execution_space;
typedef typename Kokkos::Impl::if_c<Kokkos::Impl::is_same<ExecutionSpace,Kokkos::DefaultExecutionSpace>::value ,
idx_type::memory_space, idx_type::host_mirror_space>::type memory_space;
// Get the view types on the particular device for which the functor
// is instantiated.
//
// "const_data_type" is a typedef in View (and DualView) which is
// the const version of the first template parameter of the View.
// For example, the const_data_type version of double** is const
// double**.
Kokkos::View<idx_type::const_data_type, idx_type::array_layout, memory_space> idx;
// "array_intrinsic_type" is a typedef in ViewTraits (and DualView) which is the
// array version of the value(s) stored in the View.
Kokkos::View<view_type::array_intrinsic_type, view_type::array_layout, memory_space> dest;
Kokkos::View<view_type::const_data_type, view_type::array_layout,
memory_space, Kokkos::MemoryRandomAccess> src;
// Constructor takes DualViews, synchronizes them to the device,
// then marks them as modified on the device.
localsum (idx_type dv_idx, view_type dv_dest, view_type dv_src)
{
// Extract the view on the correct Device (i.e., the correct
// memory space) from the DualView. DualView has a template
// method, view(), which is templated on the memory space. If the
// DualView has a View from that memory space, view() returns the
// View in that space.
idx = dv_idx.view<memory_space> ();
dest = dv_dest.template view<memory_space> ();
src = dv_src.template view<memory_space> ();
// Synchronize the DualView to the correct Device.
//
// DualView's sync() method is templated on a memory space, and
// synchronizes the DualView in a one-way fashion to that memory
// space. "Synchronizing" means copying, from the other memory
// space to the Device memory space. sync() does _nothing_ if the
// Views on the two memory spaces are in sync. DualView
// determines this by the user manually marking one side or the
// other as modified; see the modify() call below.
dv_idx.sync<memory_space> ();
dv_dest.template sync<memory_space> ();
dv_src.template sync<memory_space> ();
// Mark dest as modified on Device.
dv_dest.template modify<memory_space> ();
}
KOKKOS_INLINE_FUNCTION
void operator() (const int i) const {
double tmp = 0.0;
for (int j = 0; j < (int) idx.dimension_1(); ++j) {
const double val = src(idx(i,j));
tmp += val*val + 0.5*(idx.dimension_0()*val -idx.dimension_1()*val);
}
dest(i) += tmp;
}
};
class ParticleType {
public:
double q;
double m;
double q_over_m;
KOKKOS_INLINE_FUNCTION
ParticleType(double q_ = -1, double m_ = 1):
q(q_), m(m_), q_over_m(q/m) {}
protected:
};
typedef Kokkos::DualView<ParticleType[10]> ParticleTypes;
int main (int narg, char* arg[]) {
Kokkos::initialize (narg, arg);
ParticleTypes test("Test");
Kokkos::fence();
test.h_view(0) = ParticleType(-1e4,1);
Kokkos::fence();
int size = 1000000;
// Create DualViews. This will allocate on both the device and its
// host_mirror_device.
idx_type idx ("Idx",size,64);
view_type dest ("Dest",size);
view_type src ("Src",size);
srand (134231);
// Get a reference to the host view of idx directly (equivalent to
// idx.view<idx_type::host_mirror_space>() )
idx_type::t_host h_idx = idx.h_view;
for (int i = 0; i < size; ++i) {
for (view_type::size_type j = 0; j < h_idx.dimension_1 (); ++j) {
h_idx(i,j) = (size + i + (rand () % 500 - 250)) % size;
}
}
// Mark idx as modified on the host_mirror_space so that a
// sync to the device will actually move data. The sync happens in
// the functor's constructor.
idx.modify<idx_type::host_mirror_space> ();
// Run on the device. This will cause a sync of idx to the device,
// since it was marked as modified on the host.
Kokkos::Impl::Timer timer;
Kokkos::parallel_for(size,localsum<view_type::execution_space>(idx,dest,src));
Kokkos::fence();
double sec1_dev = timer.seconds();
timer.reset();
Kokkos::parallel_for(size,localsum<view_type::execution_space>(idx,dest,src));
Kokkos::fence();
double sec2_dev = timer.seconds();
// Run on the host's default execution space (could be the same as device).
// This will cause a sync back to the host of dest. Note that if the Device is CUDA,
// the data layout will not be optimal on host, so performance is
// lower than what it would be for a pure host compilation.
timer.reset();
Kokkos::parallel_for(size,localsum<Kokkos::HostSpace::execution_space>(idx,dest,src));
Kokkos::fence();
double sec1_host = timer.seconds();
timer.reset();
Kokkos::parallel_for(size,localsum<Kokkos::HostSpace::execution_space>(idx,dest,src));
Kokkos::fence();
double sec2_host = timer.seconds();
printf("Device Time with Sync: %f without Sync: %f \n",sec1_dev,sec2_dev);
printf("Host Time with Sync: %f without Sync: %f \n",sec1_host,sec2_host);
Kokkos::finalize();
}