Updating Kokkos lib

git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@15556 f3b2605a-c512-4ea7-a41b-209d697bcdaa
This commit is contained in:
stamoor
2016-09-06 23:06:32 +00:00
parent 1ad033ec0c
commit 39be4185c4
502 changed files with 157510 additions and 0 deletions

View File

@ -0,0 +1,16 @@
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
SET(SOURCES "")
SET(LIBRARIES "")
FILE(GLOB SOURCES *.cpp )
TRIBITS_ADD_EXECUTABLE(
md_skeleton
SOURCES ${SOURCES}
COMM serial mpi
DEPLIBS ${LIBRARIES}
)

View File

@ -0,0 +1,53 @@
KOKKOS_PATH ?= ../..
MAKEFILE_PATH := $(abspath $(lastword $(MAKEFILE_LIST)))
SRC_DIR := $(dir $(MAKEFILE_PATH))
SRC = $(wildcard $(SRC_DIR)/*.cpp)
OBJ = $(SRC:$(SRC_DIR)/%.cpp=%.o)
#SRC = $(wildcard *.cpp)
#OBJ = $(SRC:%.cpp=%.o)
default: build
echo "Start Build"
# use installed Makefile.kokkos
include $(KOKKOS_PATH)/Makefile.kokkos
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
CXX = $(NVCC_WRAPPER)
CXXFLAGS = -I$(SRC_DIR) -O3
LINK = $(CXX)
LINKFLAGS =
EXE = $(addsuffix .cuda, $(shell basename $(SRC_DIR)))
#KOKKOS_DEVICES = "Cuda,OpenMP"
#KOKKOS_ARCH = "SNB,Kepler35"
else
CXX = g++
CXXFLAGS = -I$(SRC_DIR) -O3
LINK = $(CXX)
LINKFLAGS =
EXE = $(addsuffix .host, $(shell basename $(SRC_DIR)))
#KOKKOS_DEVICES = "OpenMP"
#KOKKOS_ARCH = "SNB"
endif
DEPFLAGS = -M
LIB =
build: $(EXE)
$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
clean:
rm -f *.a *.o *.cuda *.host
# Compilation rules
%.o:$(SRC_DIR)/%.cpp $(KOKKOS_CPP_DEPENDS)
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<

View File

@ -0,0 +1,3 @@
To build this example on a 2012-model Macbook Pro with NVIDIA Kepler GPU:
./build.cuda_std g++_osx cuda_osx 30 opt

View File

@ -0,0 +1,192 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
/* Define values which set the max number of registers used for the Force Kernel
* Its 32 * 2048 / (KOKKOS_CUDA_MAX_THREADS * KOKKOS_CUDA_MIN_BLOCKS)
* Have to be set before including Kokkos header files.
*/
#define KOKKOS_CUDA_MAX_THREADS 512
#define KOKKOS_CUDA_MIN_BLOCKS 3
#include <system.h>
#include <cstdio>
/* Simple Lennard Jones Force Kernel using neighborlists
* Calculates for every pair of atoms (i,j) with distance smaller r_cut
* f_ij = 4*epsilon * ( (sigma/r_ij)^12 - (sigma/r_ij)^6 )
* where r_ij is the distance of atoms (i,j).
* The force on atom i is the sum over f_ij:
* f_i = sum_j (f_ij)
* Neighborlists are used in order to pre calculate which atoms j are
* close enough to i to be able to contribute. By choosing a larger neighbor
* cutoff then the force cutoff, the neighbor list can be reused several times
* (typically 10 - 100).
*/
struct ForceFunctor {
typedef t_x_array::execution_space execution_space; //Device Type for running the kernel
typedef double2 value_type; // When energy calculation is requested return energy, and virial
t_x_array_randomread x; //atom positions
t_f_array f; //atom forces
t_int_1d_const numneigh; //number of neighbors per atom
t_neighbors_const neighbors; //neighborlist
double cutforcesq; //force cutoff
double epsilon; //Potential parameter
double sigma6; //Potential parameter
ForceFunctor(System s) {
x = s.d_x;
f = s.f;
numneigh = s.numneigh;
neighbors = s.neighbors;
cutforcesq = s.force_cutsq;
epsilon = 1.0;
sigma6 = 1.0;
}
/* Operator for not calculating energy and virial */
KOKKOS_INLINE_FUNCTION
void operator() (const int &i) const {
force<0>(i);
}
/* Operator for calculating energy and virial */
KOKKOS_INLINE_FUNCTION
void operator() (const int &i, double2 &energy_virial) const {
double2 ev = force<1>(i);
energy_virial.x += ev.x;
energy_virial.y += ev.y;
}
template<int EVFLAG>
KOKKOS_INLINE_FUNCTION
double2 force(const int &i) const
{
const int numneighs = numneigh[i];
const double xtmp = x(i, 0);
const double ytmp = x(i, 1);
const double ztmp = x(i, 2);
double fix = 0;
double fiy = 0;
double fiz = 0;
double energy = 0;
double virial = 0;
//pragma simd forces vectorization (ignoring the performance objections of the compiler)
//give hint to compiler that fix, fiy and fiz are used for reduction only
#ifdef USE_SIMD
#pragma simd reduction (+: fix,fiy,fiz,energy,virial)
#endif
for(int k = 0; k < numneighs; k++) {
const int j = neighbors(i, k);
const double delx = xtmp - x(j, 0);
const double dely = ytmp - x(j, 1);
const double delz = ztmp - x(j, 2);
const double rsq = delx * delx + dely * dely + delz * delz;
//if(i==0) printf("%i %i %lf %lf\n",i,j,rsq,cutforcesq);
if(rsq < cutforcesq) {
const double sr2 = 1.0 / rsq;
const double sr6 = sr2 * sr2 * sr2 * sigma6;
const double force = 48.0 * sr6 * (sr6 - 0.5) * sr2 * epsilon;
fix += delx * force;
fiy += dely * force;
fiz += delz * force;
if(EVFLAG) {
energy += sr6 * (sr6 - 1.0) * epsilon;
virial += delx * delx * force + dely * dely * force + delz * delz * force;
}
}
}
f(i, 0) += fix;
f(i, 1) += fiy;
f(i, 2) += fiz;
double2 energy_virial ;
energy_virial.x = 4.0 * energy ;
energy_virial.y = 0.5 * virial ;
return energy_virial;
}
/* init and join functions when doing the reduction to obtain energy and virial */
KOKKOS_FUNCTION
static void init(volatile value_type &update) {
update.x = update.y = 0;
}
KOKKOS_FUNCTION
static void join(volatile value_type &update ,
const volatile value_type &source) {
update.x += source.x ;
update.y += source.y ;
}
};
/* Calling function */
double2 force(System &s,int evflag) {
ForceFunctor f(s);
double2 ev ; ev.x = 0 ; ev.y = 0 ;
if(!evflag)
Kokkos::parallel_for(s.nlocal,f);
else
Kokkos::parallel_reduce(s.nlocal,f,ev);
execution_space::fence();
return ev;
}

View File

@ -0,0 +1,205 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <cstdio>
#include <cstring>
#include <cstdlib>
#include "system.h"
int create_system(System &system, int nx, int ny, int nz, double rho);
int neigh_setup(System &system);
int neigh_build(System &system);
double2 force(System &system,int evflag);
/* simple MD Skeleton which
* - constructs a simple FCC lattice,
* - computes a neighborlist
* - compute LJ-Force kernel a number of times
*/
int main(int argc, char** argv) {
printf("Running MD Skeleton\n");
/* Thread numbers for Host */
int num_threads = 1;
int teams = 1;
int device = 0; // Default device for GPU runs
/* avoid unused variable warnings */
(void)num_threads;
(void)teams;
(void)device;
/* Default value for number of force calculations */
int iter = 100;
/* Default value for system size (4*nx*ny*nz atoms)
* nx, ny and nz are set to system_size if not specified on commandline */
int system_size = 20;
int nx = -1;
int ny = -1;
int nz = -1;
int neighbor_size = 1; // Default bin size for neighbor list construction
double rho = 0.8442; // Number density of the system
double delta = 0; // Scaling factor for random offsets of atom positions
/* read in command-line arguments */
for(int i = 0; i < argc; i++) {
if((strcmp(argv[i], "-t") == 0) || (strcmp(argv[i], "--num_threads") == 0)) {
num_threads = atoi(argv[++i]);
continue;
}
if((strcmp(argv[i], "--teams") == 0)) {
teams = atoi(argv[++i]);
continue;
}
if((strcmp(argv[i], "-d") == 0) || (strcmp(argv[i], "--device") == 0)) {
device = atoi(argv[++i]);
continue;
}
if((strcmp(argv[i], "--delta") == 0)) {
delta = atof(argv[++i]);
continue;
}
if((strcmp(argv[i], "-i") == 0) || (strcmp(argv[i], "--iter") == 0)) {
iter = atoi(argv[++i]);
continue;
}
if((strcmp(argv[i], "-rho") == 0)) {
rho = atoi(argv[++i]);
continue;
}
if((strcmp(argv[i], "-s") == 0) || (strcmp(argv[i], "--size") == 0)) {
system_size = atoi(argv[++i]);
continue;
}
if((strcmp(argv[i], "-nx") == 0)) {
nx = atoi(argv[++i]);
continue;
}
if((strcmp(argv[i], "-ny") == 0)) {
ny = atoi(argv[++i]);
continue;
}
if((strcmp(argv[i], "-nz") == 0)) {
nz = atoi(argv[++i]);
continue;
}
if((strcmp(argv[i], "-b") == 0) || (strcmp(argv[i], "--neigh_bins") == 0)) {
neighbor_size = atoi(argv[++i]);
continue;
}
}
if( nx < 0 ) nx = system_size;
if( ny < 0 ) ny = system_size;
if( nz < 0 ) nz = system_size;
printf("-> Init Device\n");
#if defined( KOKKOS_HAVE_CUDA )
Kokkos::HostSpace::execution_space::initialize(teams*num_threads);
Kokkos::Cuda::SelectDevice select_device(device);
Kokkos::Cuda::initialize(select_device);
#elif defined( KOKKOS_HAVE_OPENMP )
Kokkos::OpenMP::initialize(teams*num_threads);
#elif defined( KOKKOS_HAVE_PTHREAD )
Kokkos::Threads::initialize(teams*num_threads);
#endif
System system;
system.neigh_cut = 2.8;
system.force_cut = 2.5;
system.force_cutsq = system.force_cut*system.force_cut;
system.delta = delta;
printf("-> Build system\n");
create_system(system,nx,ny,nz,rho);
printf("-> Created %i atoms and %i ghost atoms\n",system.nlocal,system.nghost);
system.nbinx = system.box.xprd/neighbor_size+1;
system.nbiny = system.box.yprd/neighbor_size+1;
system.nbinz = system.box.zprd/neighbor_size+1;
printf("-> Building Neighborlist\n");
neigh_setup(system);
neigh_build(system);
double2 ev = force(system,1);
printf("-> Calculate Energy: %f Virial: %f\n",ev.x,ev.y);
printf("-> Running %i force calculations\n",iter);
Kokkos::Timer timer;
for(int i=0;i<iter;i++) {
force(system,0);
}
double time = timer.seconds();
printf("Time: %e s for %i iterations with %i atoms\n",time,iter,system.nlocal);
execution_space::finalize();
}

View File

@ -0,0 +1,430 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <system.h>
#include <cstdio>
#include <Kokkos_Core.hpp>
#define SMALL 1.0e-6
#define FACTOR 0.999
/* BinningFunctor puts atoms into bins of the simulation box
* Neighborlists are then created by checking only distances of atoms
* in adjacent bins. That makes neighborlist construction a O(N) operation.
*/
struct BinningFunctor {
typedef t_int_2d::execution_space execution_space;
System s;
int atoms_per_bin;
BinningFunctor(System _s): s(_s) {
atoms_per_bin = s.bins.dimension_1();
}
KOKKOS_INLINE_FUNCTION
void operator() (const int &i) const
{
const int ibin = coord2bin(s.d_x(i, 0), s.d_x(i, 1), s.d_x(i, 2));
const int ac = Kokkos::atomic_fetch_add(&s.bincount[ibin], 1);
if(ac < atoms_per_bin) {
s.bins(ibin, ac) = i;
} else if(s.d_resize(0) < ac) {
s.d_resize(0) = ac;
}
}
KOKKOS_INLINE_FUNCTION
int coord2bin(double x, double y, double z) const
{
int ix, iy, iz;
if(x >= s.box.xprd)
ix = (int)((x - s.box.xprd) * s.bininvx) + s.nbinx - s.mbinxlo;
else if(x >= 0.0)
ix = (int)(x * s.bininvx) - s.mbinxlo;
else
ix = (int)(x * s.bininvx) - s.mbinxlo - 1;
if(y >= s.box.yprd)
iy = (int)((y - s.box.yprd) * s.bininvy) + s.nbiny - s.mbinylo;
else if(y >= 0.0)
iy = (int)(y * s.bininvy) - s.mbinylo;
else
iy = (int)(y * s.bininvy) - s.mbinylo - 1;
if(z >= s.box.zprd)
iz = (int)((z - s.box.zprd) * s.bininvz) + s.nbinz - s.mbinzlo;
else if(z >= 0.0)
iz = (int)(z * s.bininvz) - s.mbinzlo;
else
iz = (int)(z * s.bininvz) - s.mbinzlo - 1;
return (iz * s.mbiny * s.mbinx + iy * s.mbinx + ix + 1);
}
};
/* Build the actual neighborlist*/
struct BuildFunctor {
typedef t_int_2d::execution_space execution_space;
System s;
int maxneighs;
BuildFunctor(System _s): s(_s) {
maxneighs = s.neighbors.dimension_1();
}
KOKKOS_INLINE_FUNCTION
void operator() (const int &i) const
{
int n = 0;
const t_int_1d_const_um bincount_c = s.bincount;
const double xtmp = s.d_x(i, 0);
const double ytmp = s.d_x(i, 1);
const double ztmp = s.d_x(i, 2);
const int ibin = coord2bin(xtmp, ytmp, ztmp);
// loop over all bins in neighborhood (includes ibin)
for(int k = 0; k < s.nstencil; k++) {
const int jbin = ibin + s.d_stencil[k];
// get subview of jbin
const t_int_1d_const_um loc_bin =
Kokkos::subview(s.bins,jbin,Kokkos::ALL());
if(ibin == jbin)
for(int m = 0; m < bincount_c[jbin]; m++) {
const int j = loc_bin[m];
//for same bin as atom i skip j if i==j
if (j == i) continue;
const double delx = xtmp - s.d_x(j, 0);
const double dely = ytmp - s.d_x(j, 1);
const double delz = ztmp - s.d_x(j, 2);
const double rsq = delx * delx + dely * dely + delz * delz;
if(rsq <= s.neigh_cutsq && n<maxneighs) s.neighbors(i,n++) = j;
}
else {
for(int m = 0; m < bincount_c[jbin]; m++) {
const int j = loc_bin[m];
const double delx = xtmp - s.d_x(j, 0);
const double dely = ytmp - s.d_x(j, 1);
const double delz = ztmp - s.d_x(j, 2);
const double rsq = delx * delx + dely * dely + delz * delz;
if(rsq <= s.neigh_cutsq && n<maxneighs) s.neighbors(i,n++) = j;
}
}
}
s.numneigh[i] = n;
if(n >= maxneighs) {
if(n >= s.d_resize(0)) s.d_resize(0) = n;
}
}
KOKKOS_INLINE_FUNCTION
int coord2bin(double x, double y, double z) const
{
int ix, iy, iz;
if(x >= s.box.xprd)
ix = (int)((x - s.box.xprd) * s.bininvx) + s.nbinx - s.mbinxlo;
else if(x >= 0.0)
ix = (int)(x * s.bininvx) - s.mbinxlo;
else
ix = (int)(x * s.bininvx) - s.mbinxlo - 1;
if(y >= s.box.yprd)
iy = (int)((y - s.box.yprd) * s.bininvy) + s.nbiny - s.mbinylo;
else if(y >= 0.0)
iy = (int)(y * s.bininvy) - s.mbinylo;
else
iy = (int)(y * s.bininvy) - s.mbinylo - 1;
if(z >= s.box.zprd)
iz = (int)((z - s.box.zprd) * s.bininvz) + s.nbinz - s.mbinzlo;
else if(z >= 0.0)
iz = (int)(z * s.bininvz) - s.mbinzlo;
else
iz = (int)(z * s.bininvz) - s.mbinzlo - 1;
return (iz * s.mbiny * s.mbinx + iy * s.mbinx + ix + 1);
}
};
/* Reset an array to zero */
struct MemsetZeroFunctor {
typedef t_x_array::execution_space execution_space ;
void* ptr;
KOKKOS_INLINE_FUNCTION void operator()(const int i) const {
((int*)ptr)[i] = 0;
}
};
/* Calculate distance of two bins */
double bindist(System &s, int i, int j, int k)
{
double delx, dely, delz;
if(i > 0)
delx = (i - 1) * s.binsizex;
else if(i == 0)
delx = 0.0;
else
delx = (i + 1) * s.binsizex;
if(j > 0)
dely = (j - 1) * s.binsizey;
else if(j == 0)
dely = 0.0;
else
dely = (j + 1) * s.binsizey;
if(k > 0)
delz = (k - 1) * s.binsizez;
else if(k == 0)
delz = 0.0;
else
delz = (k + 1) * s.binsizez;
return (delx * delx + dely * dely + delz * delz);
}
/* Setup the neighborlist construction
* Determine binsizes, a stencil for defining adjacency, etc.
*/
void neigh_setup(System &s) {
s.neigh_cutsq = s.neigh_cut * s.neigh_cut;
/*
c bins must evenly divide into box size,
c becoming larger than cutneigh if necessary
c binsize = 1/2 of cutoff is near optimal
if (flag == 0) {
nbinx = 2.0 * xprd / cutneigh;
nbiny = 2.0 * yprd / cutneigh;
nbinz = 2.0 * zprd / cutneigh;
if (nbinx == 0) nbinx = 1;
if (nbiny == 0) nbiny = 1;
if (nbinz == 0) nbinz = 1;
}
*/
s.binsizex = s.box.xprd / s.nbinx;
s.binsizey = s.box.yprd / s.nbiny;
s.binsizez = s.box.zprd / s.nbinz;
s.bininvx = 1.0 / s.binsizex;
s.bininvy = 1.0 / s.binsizey;
s.bininvz = 1.0 / s.binsizez;
double coord = s.box.xlo - s.neigh_cut - SMALL * s.box.xprd;
s.mbinxlo = static_cast<int>(coord * s.bininvx);
if(coord < 0.0) s.mbinxlo = s.mbinxlo - 1;
coord = s.box.xhi + s.neigh_cut + SMALL * s.box.xprd;
int mbinxhi = static_cast<int>(coord * s.bininvx);
coord = s.box.ylo - s.neigh_cut - SMALL * s.box.yprd;
s.mbinylo = static_cast<int>(coord * s.bininvy);
if(coord < 0.0) s.mbinylo = s.mbinylo - 1;
coord = s.box.yhi + s.neigh_cut + SMALL * s.box.yprd;
int mbinyhi = static_cast<int>(coord * s.bininvy);
coord = s.box.zlo - s.neigh_cut - SMALL * s.box.zprd;
s.mbinzlo = static_cast<int>(coord * s.bininvz);
if(coord < 0.0) s.mbinzlo = s.mbinzlo - 1;
coord = s.box.zhi + s.neigh_cut + SMALL * s.box.zprd;
int mbinzhi = static_cast<int>(coord * s.bininvz);
/* extend bins by 1 in each direction to insure stencil coverage */
s.mbinxlo = s.mbinxlo - 1;
mbinxhi = mbinxhi + 1;
s.mbinx = mbinxhi - s.mbinxlo + 1;
s.mbinylo = s.mbinylo - 1;
mbinyhi = mbinyhi + 1;
s.mbiny = mbinyhi - s.mbinylo + 1;
s.mbinzlo = s.mbinzlo - 1;
mbinzhi = mbinzhi + 1;
s.mbinz = mbinzhi - s.mbinzlo + 1;
/*
compute bin stencil of all bins whose closest corner to central bin
is within neighbor cutoff
for partial Newton (newton = 0),
stencil is all surrounding bins including self
for full Newton (newton = 1),
stencil is bins to the "upper right" of central bin, does NOT include self
next(xyz) = how far the stencil could possibly extend
factor < 1.0 for special case of LJ benchmark so code will create
correct-size stencil when there are 3 bins for every 5 lattice spacings
*/
int nextx = static_cast<int>(s.neigh_cut * s.bininvx);
if(nextx * s.binsizex < FACTOR * s.neigh_cut) nextx++;
int nexty = static_cast<int>(s.neigh_cut * s.bininvy);
if(nexty * s.binsizey < FACTOR * s.neigh_cut) nexty++;
int nextz = static_cast<int>(s.neigh_cut * s.bininvz);
if(nextz * s.binsizez < FACTOR * s.neigh_cut) nextz++;
int nmax = (2 * nextz + 1) * (2 * nexty + 1) * (2 * nextx + 1);
s.d_stencil = t_int_1d("stencil", nmax);
s.h_stencil = Kokkos::create_mirror_view(s.d_stencil);
s.nstencil = 0;
int kstart = -nextz;
for(int k = kstart; k <= nextz; k++) {
for(int j = -nexty; j <= nexty; j++) {
for(int i = -nextx; i <= nextx; i++) {
if(bindist(s,i, j, k) < s.neigh_cutsq) {
s.h_stencil(s.nstencil++) = k * s.mbiny * s.mbinx + j * s.mbinx + i;
}
}
}
}
/* Allocate neighbor arrays */
Kokkos::deep_copy(s.d_stencil, s.h_stencil);
s.mbins = s.mbinx * s.mbiny * s.mbinz;
s.bincount = t_int_1d("bincount", s.mbins);
s.bins = t_int_2d("bins", s.mbins, 8);
s.neighbors = t_neighbors("neighbors",s.natoms,80);
s.numneigh = t_int_1d("numneigh",s.natoms);
s.d_resize = t_int_scalar("resize");
s.h_resize = Kokkos::create_mirror_view(s.d_resize);
}
/* Build the neighborlist
* This is a try and rerun algorithm for handling the case where the bins array
* and the neighbors array are not big enough. So if one is too small, it will
* reallocate and rerun the binnind algorithm or the neighborlist construction.
*/
void neigh_build(System &s) {
/* Binning of atoms */
s.h_resize(0) = 1;
while(s.h_resize(0) > 0) {
s.h_resize(0) = 0;
Kokkos::deep_copy(s.d_resize, s.h_resize);
MemsetZeroFunctor f_zero;
f_zero.ptr = (void*) s.bincount.ptr_on_device();
Kokkos::parallel_for(s.mbins, f_zero);
execution_space::fence();
BinningFunctor f(s);
Kokkos::parallel_for(s.natoms, f);
execution_space::fence();
/* Check if bins was large enough, if nor reallocated and rerun */
deep_copy(s.h_resize, s.d_resize);
if(s.h_resize(0)) {
int atoms_per_bin = s.h_resize(0)+2;
s.bins = t_int_2d("bins", s.mbins, atoms_per_bin);
}
}
/* Neighborlist construction */
s.h_resize(0) = 1;
while(s.h_resize(0)) {
s.h_resize(0) = 0;
Kokkos::deep_copy(s.d_resize, s.h_resize);
BuildFunctor f(s);
Kokkos::parallel_for(s.nlocal, f);
execution_space::fence();
/* Check if neighbors was large enough, if nor reallocated and rerun */
deep_copy(s.h_resize, s.d_resize);
if(s.h_resize(0)) {
int maxneighs = s.h_resize(0) * 1.2;
s.neighbors = t_neighbors("neighbors", s.natoms, maxneighs);
}
}
}

View File

@ -0,0 +1,271 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <system.h>
#include <cmath>
#include <cstdio>
#include <cstdlib>
/* initialize atoms on fcc lattice in parallel fashion */
#define MAX(a,b) (a>b?a:b)
#define MIN(a,b) (a<b?a:b)
int create_system(System &system, int nx, int ny, int nz, double rho)
{
/* Box Setup */
double lattice = pow((4.0 / rho), (1.0 / 3.0));
system.box.xprd = nx * lattice;
system.box.yprd = ny * lattice;
system.box.zprd = nz * lattice;
system.box.xlo = 0;
system.box.ylo = 0;
system.box.zlo = 0;
system.box.xhi = system.box.xprd;
system.box.yhi = system.box.yprd;
system.box.zhi = system.box.zprd;
int ghost_dist = int(system.neigh_cut/lattice) + 1;
/* total # of atoms */
system.nlocal = 4 * nx * ny * nz;
system.nghost = 4 * (nx + 2 * ghost_dist) *
(ny + 2 * ghost_dist) *
(nz + 2 * ghost_dist) -
system.nlocal;
system.natoms = system.nlocal + system.nghost;
system.d_x = t_x_array("X",system.natoms);
system.h_x = Kokkos::create_mirror_view(system.d_x);
system.f = t_f_array("F",system.natoms);
/* determine loop bounds of lattice subsection that overlaps my sub-box
insure loop bounds do not exceed nx,ny,nz */
double alat = pow((4.0 / rho), (1.0 / 3.0));
int ilo = static_cast<int>(system.box.xlo / (0.5 * alat) - 1);
int ihi = static_cast<int>(system.box.xhi / (0.5 * alat) + 1);
int jlo = static_cast<int>(system.box.ylo / (0.5 * alat) - 1);
int jhi = static_cast<int>(system.box.yhi / (0.5 * alat) + 1);
int klo = static_cast<int>(system.box.zlo / (0.5 * alat) - 1);
int khi = static_cast<int>(system.box.zhi / (0.5 * alat) + 1);
ilo = MAX(ilo, 0);
ihi = MIN(ihi, 2 * nx - 1);
jlo = MAX(jlo, 0);
jhi = MIN(jhi, 2 * ny - 1);
klo = MAX(klo, 0);
khi = MIN(khi, 2 * nz - 1);
/* generates positions of atoms on fcc sublattice*/
srand(3718273);
/* create non-ghost atoms */
{
double xtmp, ytmp, ztmp;
int sx = 0;
int sy = 0;
int sz = 0;
int ox = 0;
int oy = 0;
int oz = 0;
int subboxdim = 8;
int n = 0;
int iflag = 0;
while(oz * subboxdim <= khi) {
const int k = oz * subboxdim + sz;
const int j = oy * subboxdim + sy;
const int i = ox * subboxdim + sx;
if(iflag) continue;
if(((i + j + k) % 2 == 0) &&
(i >= ilo) && (i <= ihi) &&
(j >= jlo) && (j <= jhi) &&
(k >= klo) && (k <= khi)) {
const int nold = n;
while(nold == n) {
xtmp = 0.5 * alat * i + system.delta/1000*(rand()%1000-500);
ytmp = 0.5 * alat * j + system.delta/1000*(rand()%1000-500);
ztmp = 0.5 * alat * k + system.delta/1000*(rand()%1000-500);
if(xtmp >= system.box.xlo && xtmp < system.box.xhi &&
ytmp >= system.box.ylo && ytmp < system.box.yhi &&
ztmp >= system.box.zlo && ztmp < system.box.zhi) {
system.h_x(n,0) = xtmp;
system.h_x(n,1) = ytmp;
system.h_x(n,2) = ztmp;
n++;
}
}
}
sx++;
if(sx == subboxdim) {
sx = 0;
sy++;
}
if(sy == subboxdim) {
sy = 0;
sz++;
}
if(sz == subboxdim) {
sz = 0;
ox++;
}
if(ox * subboxdim > ihi) {
ox = 0;
oy++;
}
if(oy * subboxdim > jhi) {
oy = 0;
oz++;
}
}
/* check that correct # of atoms were created */
if(system.nlocal != n) {
printf("Created incorrect # of atoms\n");
return 1;
}
}
/* create ghost atoms */
{
double xtmp, ytmp, ztmp;
int ilo_g = ilo - 2 * ghost_dist;
int jlo_g = jlo - 2 * ghost_dist;
int klo_g = klo - 2 * ghost_dist;
int ihi_g = ihi + 2 * ghost_dist;
int jhi_g = jhi + 2 * ghost_dist;
int khi_g = khi + 2 * ghost_dist;
int subboxdim = 8;
int sx = 0;
int sy = 0;
int sz = 0;
int ox = subboxdim * ilo_g;
int oy = subboxdim * jlo_g;
int oz = subboxdim * klo_g;
int n = system.nlocal;
int iflag = 0;
while(oz * subboxdim <= khi_g) {
const int k = oz * subboxdim + sz;
const int j = oy * subboxdim + sy;
const int i = ox * subboxdim + sx;
if(iflag) continue;
if(((i + j + k) % 2 == 0) &&
(i >= ilo_g) && (i <= ihi_g) &&
(j >= jlo_g) && (j <= jhi_g) &&
(k >= klo_g) && (k <= khi_g) &&
((i < ilo) || (i > ihi) ||
(j < jlo) || (j > jhi) ||
(k < klo) || (k > khi))
) {
xtmp = 0.5 * alat * i;
ytmp = 0.5 * alat * j;
ztmp = 0.5 * alat * k;
system.h_x(n,0) = xtmp + system.delta/1000*(rand()%1000-500);;
system.h_x(n,1) = ytmp + system.delta/1000*(rand()%1000-500);;
system.h_x(n,2) = ztmp + system.delta/1000*(rand()%1000-500);;
n++;
}
sx++;
if(sx == subboxdim) {
sx = 0;
sy++;
}
if(sy == subboxdim) {
sy = 0;
sz++;
}
if(sz == subboxdim) {
sz = 0;
ox++;
//printf("%i %i %i // %i %i %i\n",ox,oy,oz,i,j,k);
}
if(ox * subboxdim > ihi_g) {
ox = subboxdim * ilo_g;
oy++;
}
if(oy * subboxdim > jhi_g) {
oy = subboxdim * jlo_g;
oz++;
}
}
}
Kokkos::deep_copy(system.d_x,system.h_x);
return 0;
}

View File

@ -0,0 +1,92 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef SYSTEM_H_
#define SYSTEM_H_
#include <types.h>
struct Box {
double xprd, yprd, zprd;
double xlo, xhi;
double ylo, yhi;
double zlo, zhi;
};
struct System {
Box box;
int natoms;
int nlocal;
int nghost;
t_x_array d_x;
t_x_array_host h_x;
t_f_array f;
t_neighbors neighbors;
t_int_1d numneigh;
double delta;
double neigh_cut,neigh_cutsq;
int mbins;
int nbinx,nbiny,nbinz;
int mbinx,mbiny,mbinz;
int mbinxlo,mbinylo,mbinzlo;
double binsizex,binsizey,binsizez;
double bininvx,bininvy,bininvz;
t_int_1d bincount;
t_int_2d bins;
t_int_scalar d_resize;
t_int_scalar_host h_resize;
t_int_1d d_stencil;
t_int_1d_host h_stencil;
int nstencil;
double force_cut,force_cutsq;
};
#endif

View File

@ -0,0 +1,118 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef TYPES_H_
#define TYPES_H_
/* Determine default device type and necessary includes */
#include <Kokkos_Core.hpp>
typedef Kokkos::DefaultExecutionSpace execution_space ;
#if ! defined( KOKKOS_HAVE_CUDA )
struct double2 {
double x, y;
KOKKOS_INLINE_FUNCTION
double2(double xinit, double yinit) {
x = xinit;
y = yinit;
}
KOKKOS_INLINE_FUNCTION
double2() {
x = 0.0;
y = 0.0;
}
KOKKOS_INLINE_FUNCTION
double2& operator += (const double2& src) {
x+=src.x;
y+=src.y;
return *this;
}
KOKKOS_INLINE_FUNCTION
volatile double2& operator += (const volatile double2& src) volatile {
x+=src.x;
y+=src.y;
return *this;
}
};
#endif
#include <impl/Kokkos_Timer.hpp>
/* Define types used throughout the code */
//Position arrays
typedef Kokkos::View<double*[3], Kokkos::LayoutRight, execution_space> t_x_array ;
typedef t_x_array::HostMirror t_x_array_host ;
typedef Kokkos::View<const double*[3], Kokkos::LayoutRight, execution_space> t_x_array_const ;
typedef Kokkos::View<const double*[3], Kokkos::LayoutRight, execution_space, Kokkos::MemoryRandomAccess > t_x_array_randomread ;
//Force array
typedef Kokkos::View<double*[3], execution_space> t_f_array ;
//Neighborlist
typedef Kokkos::View<int**, execution_space > t_neighbors ;
typedef Kokkos::View<const int**, execution_space > t_neighbors_const ;
typedef Kokkos::View<int*, execution_space, Kokkos::MemoryUnmanaged > t_neighbors_sub ;
typedef Kokkos::View<const int*, execution_space, Kokkos::MemoryUnmanaged > t_neighbors_const_sub ;
//1d int array
typedef Kokkos::View<int*, execution_space > t_int_1d ;
typedef t_int_1d::HostMirror t_int_1d_host ;
typedef Kokkos::View<const int*, execution_space > t_int_1d_const ;
typedef Kokkos::View<int*, execution_space , Kokkos::MemoryUnmanaged> t_int_1d_um ;
typedef Kokkos::View<const int* , execution_space , Kokkos::MemoryUnmanaged> t_int_1d_const_um ;
//2d int array
typedef Kokkos::View<int**, Kokkos::LayoutRight, execution_space > t_int_2d ;
typedef t_int_2d::HostMirror t_int_2d_host ;
//Scalar ints
typedef Kokkos::View<int[1], Kokkos::LayoutLeft, execution_space> t_int_scalar ;
typedef t_int_scalar::HostMirror t_int_scalar_host ;
#endif /* TYPES_H_ */