Compare commits
64 Commits
patch_6Jan
...
patch_17Ja
| Author | SHA1 | Date | |
|---|---|---|---|
| d2fc88a626 | |||
| c52a26382f | |||
| ad4d299975 | |||
| 83408b195f | |||
| cd7bdf9251 | |||
| 8c5b108900 | |||
| c19d2011bb | |||
| 973bef4d45 | |||
| 1b9e50c8cb | |||
| 252e07e083 | |||
| 74a661ae26 | |||
| d8bc590aaf | |||
| c9bea60710 | |||
| 5cd856c97f | |||
| 2f13365cf5 | |||
| 0a2b78acb8 | |||
| 3f46b6d782 | |||
| 5abd6e5122 | |||
| f3a82f454e | |||
| 473a3ebeef | |||
| b220850377 | |||
| fa00e0593f | |||
| 4a09399dc6 | |||
| 5821fe8dd5 | |||
| 98ceb6feb1 | |||
| 61cff85435 | |||
| aa0b327f7e | |||
| 04fe071968 | |||
| 78498715b4 | |||
| b2f67fea30 | |||
| c59bcf31d1 | |||
| 2540fc281c | |||
| e8e03dd440 | |||
| daf766d4f8 | |||
| 630783c8e8 | |||
| c94030d966 | |||
| 1229f6f60b | |||
| 0b081b0086 | |||
| 8e1cf6643c | |||
| 6950a99162 | |||
| 9f4e5e0661 | |||
| 34cb4027df | |||
| 1d0e600ab7 | |||
| 7162cafdf5 | |||
| ee9e7cfbd5 | |||
| 7839c335da | |||
| 622d926849 | |||
| 92d15d4a89 | |||
| 95706ac846 | |||
| d06688bb91 | |||
| d014e00e53 | |||
| 0db2a07993 | |||
| 33412c76ed | |||
| e5ac49d1de | |||
| 1a81da0f73 | |||
| c31f1e9f22 | |||
| ebd25cc078 | |||
| 9250a55923 | |||
| a9f0b7d523 | |||
| 20f8a8c219 | |||
| 09af780aa8 | |||
| 51e52b477a | |||
| 20a4e365b7 | |||
| ccd09e3967 |
Binary file not shown.
|
Before Width: | Height: | Size: 57 KiB After Width: | Height: | Size: 25 KiB |
BIN
doc/src/JPG/tutorial_reverse_pull_request7.png
Normal file
BIN
doc/src/JPG/tutorial_reverse_pull_request7.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 25 KiB |
@ -1,7 +1,7 @@
|
||||
<!-- HTML_ONLY -->
|
||||
<HEAD>
|
||||
<TITLE>LAMMPS Users Manual</TITLE>
|
||||
<META NAME="docnumber" CONTENT="6 Jan 2017 version">
|
||||
<META NAME="docnumber" CONTENT="17 Jan 2017 version">
|
||||
<META NAME="author" CONTENT="http://lammps.sandia.gov - Sandia National Laboratories">
|
||||
<META NAME="copyright" CONTENT="Copyright (2003) Sandia Corporation. This software and manual is distributed under the GNU General Public License.">
|
||||
</HEAD>
|
||||
@ -21,7 +21,7 @@
|
||||
<H1></H1>
|
||||
|
||||
LAMMPS Documentation :c,h3
|
||||
6 Jan 2017 version :c,h4
|
||||
17 Jan 2017 version :c,h4
|
||||
|
||||
Version info: :h4
|
||||
|
||||
|
||||
@ -583,6 +583,7 @@ USER-INTEL, k = KOKKOS, o = USER-OMP, t = OPT.
|
||||
"lineforce"_fix_lineforce.html,
|
||||
"momentum (k)"_fix_momentum.html,
|
||||
"move"_fix_move.html,
|
||||
"mscg"_fix_mscg.html,
|
||||
"msst"_fix_msst.html,
|
||||
"neb"_fix_neb.html,
|
||||
"nph (ko)"_fix_nh.html,
|
||||
@ -918,7 +919,7 @@ KOKKOS, o = USER-OMP, t = OPT.
|
||||
"dpd (go)"_pair_dpd.html,
|
||||
"dpd/tstat (go)"_pair_dpd.html,
|
||||
"dsmc"_pair_dsmc.html,
|
||||
"eam (gkot)"_pair_eam.html,
|
||||
"eam (gkiot)"_pair_eam.html,
|
||||
"eam/alloy (gkot)"_pair_eam.html,
|
||||
"eam/fs (gkot)"_pair_eam.html,
|
||||
"eim (o)"_pair_eim.html,
|
||||
|
||||
@ -55,12 +55,13 @@ LAMMPS errors are detected at setup time; others like a bond
|
||||
stretching too far may not occur until the middle of a run.
|
||||
|
||||
LAMMPS tries to flag errors and print informative error messages so
|
||||
you can fix the problem. Of course, LAMMPS cannot figure out your
|
||||
physics or numerical mistakes, like choosing too big a timestep,
|
||||
specifying erroneous force field coefficients, or putting 2 atoms on
|
||||
top of each other! If you run into errors that LAMMPS doesn't catch
|
||||
that you think it should flag, please send an email to the
|
||||
"developers"_http://lammps.sandia.gov/authors.html.
|
||||
you can fix the problem. For most errors it will also print the last
|
||||
input script command that it was processing. Of course, LAMMPS cannot
|
||||
figure out your physics or numerical mistakes, like choosing too big a
|
||||
timestep, specifying erroneous force field coefficients, or putting 2
|
||||
atoms on top of each other! If you run into errors that LAMMPS
|
||||
doesn't catch that you think it should flag, please send an email to
|
||||
the "developers"_http://lammps.sandia.gov/authors.html.
|
||||
|
||||
If you get an error message about an invalid command in your input
|
||||
script, you can determine what command is causing the problem by
|
||||
|
||||
@ -1153,7 +1153,7 @@ Package, Description, Author(s), Doc page, Example, Pic/movie, Library
|
||||
"USER-MISC"_#USER-MISC, single-file contributions, USER-MISC/README, USER-MISC/README, -, -, -
|
||||
"USER-MANIFOLD"_#USER-MANIFOLD, motion on 2d surface, Stefan Paquay (Eindhoven U of Technology), "fix manifoldforce"_fix_manifoldforce.html, USER/manifold, "manifold"_manifold, -
|
||||
"USER-MOLFILE"_#USER-MOLFILE, "VMD"_VMD molfile plug-ins, Axel Kohlmeyer (Temple U), "dump molfile"_dump_molfile.html, -, -, VMD-MOLFILE
|
||||
"USER-NC-DUMP"_#USER-NC-DUMP, dump output via NetCDF, Lars Pastewka (Karlsruhe Institute of Technology, KIT), "dump nc, dump nc/mpiio"_dump_nc.html, -, -, lib/netcdf
|
||||
"USER-NC-DUMP"_#USER-NC-DUMP, dump output via NetCDF, Lars Pastewka (Karlsruhe Institute of Technology, KIT), "dump nc / dump nc/mpiio"_dump_nc.html, -, -, lib/netcdf
|
||||
"USER-OMP"_#USER-OMP, OpenMP threaded styles, Axel Kohlmeyer (Temple U), "Section 5.3.4"_accelerate_omp.html, -, -, -
|
||||
"USER-PHONON"_#USER-PHONON, phonon dynamical matrix, Ling-Ti Kong (Shanghai Jiao Tong U), "fix phonon"_fix_phonon.html, USER/phonon, -, -
|
||||
"USER-QMMM"_#USER-QMMM, QM/MM coupling, Axel Kohlmeyer (Temple U), "fix qmmm"_fix_qmmm.html, USER/qmmm, -, lib/qmmm
|
||||
@ -1610,11 +1610,12 @@ and a "dump nc/mpiio"_dump_nc.html command to output LAMMPS snapshots
|
||||
in this format. See src/USER-NC-DUMP/README for more details.
|
||||
|
||||
NetCDF files can be directly visualized with the following tools:
|
||||
|
||||
Ovito (http://www.ovito.org/). Ovito supports the AMBER convention
|
||||
and all of the above extensions. :ulb,l
|
||||
and all of the above extensions. :ulb,l
|
||||
VMD (http://www.ks.uiuc.edu/Research/vmd/) :l
|
||||
AtomEye (http://www.libatoms.org/). The libAtoms version of AtomEye contains
|
||||
a NetCDF reader that is not present in the standard distribution of AtomEye :l,ule
|
||||
a NetCDF reader that is not present in the standard distribution of AtomEye :l,ule
|
||||
|
||||
The person who created these files is Lars Pastewka at
|
||||
Karlsruhe Institute of Technology (lars.pastewka at kit.edu).
|
||||
|
||||
@ -1727,7 +1727,7 @@ thermodynamic state and a total run time for the simulation. It then
|
||||
appends statistics about the CPU time and storage requirements for the
|
||||
simulation. An example set of statistics is shown here:
|
||||
|
||||
Loop time of 2.81192 on 4 procs for 300 steps with 2004 atoms
|
||||
Loop time of 2.81192 on 4 procs for 300 steps with 2004 atoms :pre
|
||||
|
||||
Performance: 18.436 ns/day 1.302 hours/ns 106.689 timesteps/s
|
||||
97.0% CPU use with 4 MPI tasks x no OpenMP threads :pre
|
||||
@ -1757,14 +1757,14 @@ Ave special neighs/atom = 2.34032
|
||||
Neighbor list builds = 26
|
||||
Dangerous builds = 0 :pre
|
||||
|
||||
The first section provides a global loop timing summary. The loop time
|
||||
The first section provides a global loop timing summary. The {loop time}
|
||||
is the total wall time for the section. The {Performance} line is
|
||||
provided for convenience to help predicting the number of loop
|
||||
continuations required and for comparing performance with other
|
||||
similar MD codes. The CPU use line provides the CPU utilzation per
|
||||
continuations required and for comparing performance with other,
|
||||
similar MD codes. The {CPU use} line provides the CPU utilzation per
|
||||
MPI task; it should be close to 100% times the number of OpenMP
|
||||
threads (or 1). Lower numbers correspond to delays due to file I/O or
|
||||
insufficient thread utilization.
|
||||
threads (or 1 of no OpenMP). Lower numbers correspond to delays due
|
||||
to file I/O or insufficient thread utilization.
|
||||
|
||||
The MPI task section gives the breakdown of the CPU run time (in
|
||||
seconds) into major categories:
|
||||
@ -1791,7 +1791,7 @@ is present that also prints the CPU utilization in percent. In
|
||||
addition, when using {timer full} and the "package omp"_package.html
|
||||
command are active, a similar timing summary of time spent in threaded
|
||||
regions to monitor thread utilization and load balance is provided. A
|
||||
new entry is the {Reduce} section, which lists the time spend in
|
||||
new entry is the {Reduce} section, which lists the time spent in
|
||||
reducing the per-thread data elements to the storage for non-threaded
|
||||
computation. These thread timings are taking from the first MPI rank
|
||||
only and and thus, as the breakdown for MPI tasks can change from MPI
|
||||
|
||||
@ -29,7 +29,7 @@ Bond Styles: fene, harmonic :l
|
||||
Dihedral Styles: charmm, harmonic, opls :l
|
||||
Fixes: nve, npt, nvt, nvt/sllod :l
|
||||
Improper Styles: cvff, harmonic :l
|
||||
Pair Styles: buck/coul/cut, buck/coul/long, buck, gayberne,
|
||||
Pair Styles: buck/coul/cut, buck/coul/long, buck, eam, gayberne,
|
||||
charmm/coul/long, lj/cut, lj/cut/coul/long, sw, tersoff :l
|
||||
K-Space Styles: pppm :l
|
||||
:ule
|
||||
|
||||
@ -110,14 +110,14 @@ mpirun -np 96 -ppn 12 lmp_g++ -k on t 20 -sf kk -in in.lj # ditto on 8 Phis :p
|
||||
[Required hardware/software:]
|
||||
|
||||
Kokkos support within LAMMPS must be built with a C++11 compatible
|
||||
compiler. If using gcc, version 4.8.1 or later is required.
|
||||
compiler. If using gcc, version 4.7.2 or later is required.
|
||||
|
||||
To build with Kokkos support for CPUs, your compiler must support the
|
||||
OpenMP interface. You should have one or more multi-core CPUs so that
|
||||
multiple threads can be launched by each MPI task running on a CPU.
|
||||
|
||||
To build with Kokkos support for NVIDIA GPUs, NVIDIA Cuda software
|
||||
version 6.5 or later must be installed on your system. See the
|
||||
version 7.5 or later must be installed on your system. See the
|
||||
discussion for the "GPU"_accelerate_gpu.html package for details of
|
||||
how to check and do this.
|
||||
|
||||
|
||||
@ -91,6 +91,7 @@ Commands :h1
|
||||
suffix
|
||||
tad
|
||||
temper
|
||||
temper_grem
|
||||
thermo
|
||||
thermo_modify
|
||||
thermo_style
|
||||
|
||||
@ -10,22 +10,34 @@ compute coord/atom command :h3
|
||||
|
||||
[Syntax:]
|
||||
|
||||
compute ID group-ID coord/atom cutoff type1 type2 ... :pre
|
||||
compute ID group-ID coord/atom cstyle args ... :pre
|
||||
|
||||
ID, group-ID are documented in "compute"_compute.html command
|
||||
coord/atom = style name of this compute command
|
||||
cutoff = distance within which to count coordination neighbors (distance units)
|
||||
typeN = atom type for Nth coordination count (see asterisk form below) :ul
|
||||
one cstyle must be appended :ul
|
||||
|
||||
cstyle = {cutoff} or {orientorder}
|
||||
|
||||
{cutoff} args = cutoff typeN
|
||||
cutoff = distance within which to count coordination neighbors (distance units)
|
||||
typeN = atom type for Nth coordination count (see asterisk form below) :pre
|
||||
|
||||
{orientorder} args = orientorderID threshold
|
||||
orientorderID = ID of a previously defined orientorder/atom compute
|
||||
threshold = minimum value of the scalar product between two 'connected' atoms (see text for explanation) :pre
|
||||
|
||||
[Examples:]
|
||||
|
||||
compute 1 all coord/atom 2.0
|
||||
compute 1 all coord/atom 6.0 1 2
|
||||
compute 1 all coord/atom 6.0 2*4 5*8 * :pre
|
||||
compute 1 all coord/atom cutoff 2.0
|
||||
compute 1 all coord/atom cutoff 6.0 1 2
|
||||
compute 1 all coord/atom cutoff 6.0 2*4 5*8 *
|
||||
compute 1 all coord/atom orientorder 2 0.5 :pre
|
||||
|
||||
[Description:]
|
||||
|
||||
Define a computation that calculates one or more coordination numbers
|
||||
This compute performs generic calculations between neighboring atoms. So far,
|
||||
there are two cstyles implemented: {cutoff} and {orientorder}.
|
||||
The {cutoff} cstyle calculates one or more coordination numbers
|
||||
for each atom in a group.
|
||||
|
||||
A coordination number is defined as the number of neighbor atoms with
|
||||
@ -49,6 +61,14 @@ from 1 to N. A leading asterisk means all types from 1 to n
|
||||
(inclusive). A middle asterisk means all types from m to n
|
||||
(inclusive).
|
||||
|
||||
The {orientorder} cstyle calculates the number of 'connected' atoms j
|
||||
around each atom i. The atom j is connected to i if the scalar product
|
||||
({Ybar_lm(i)},{Ybar_lm(j)}) is larger than {threshold}. Thus, this cstyle
|
||||
will work only if a "compute orientorder/atom"_compute_orientorder_atom.html
|
||||
has been previously defined. This cstyle allows one to apply the
|
||||
ten Wolde's criterion to identify cristal-like atoms in a system
|
||||
(see "ten Wolde et al."_#tenWolde).
|
||||
|
||||
The value of all coordination numbers will be 0.0 for atoms not in the
|
||||
specified compute group.
|
||||
|
||||
@ -83,10 +103,19 @@ options.
|
||||
The per-atom vector or array values will be a number >= 0.0, as
|
||||
explained above.
|
||||
|
||||
[Restrictions:] none
|
||||
[Restrictions:]
|
||||
The cstyle {orientorder} can only be used if a
|
||||
"compute orientorder/atom"_compute_orientorder_atom.html command
|
||||
was previously defined. Otherwise, an error message will be issued.
|
||||
|
||||
[Related commands:]
|
||||
|
||||
"compute cluster/atom"_compute_cluster_atom.html
|
||||
"compute orientorder/atom"_compute_orientorder_atom.html
|
||||
|
||||
[Default:] none
|
||||
|
||||
:line
|
||||
|
||||
:link(tenWolde)
|
||||
[(tenWolde)] P. R. ten Wolde, M. J. Ruiz-Montero, D. Frenkel, J. Chem. Phys. 104, 9932 (1996).
|
||||
|
||||
@ -15,17 +15,19 @@ compute ID group-ID orientorder/atom keyword values ... :pre
|
||||
ID, group-ID are documented in "compute"_compute.html command :ulb,l
|
||||
orientorder/atom = style name of this compute command :l
|
||||
one or more keyword/value pairs may be appended :l
|
||||
keyword = {cutoff} or {nnn} or {degrees}
|
||||
keyword = {cutoff} or {nnn} or {degrees} or {components}
|
||||
{cutoff} value = distance cutoff
|
||||
{nnn} value = number of nearest neighbors
|
||||
{degrees} values = nlvalues, l1, l2,... :pre
|
||||
{degrees} values = nlvalues, l1, l2,...
|
||||
{components} value = l :pre
|
||||
|
||||
:ule
|
||||
|
||||
[Examples:]
|
||||
|
||||
compute 1 all orientorder/atom
|
||||
compute 1 all orientorder/atom degrees 5 4 6 8 10 12 nnn NULL cutoff 1.5 :pre
|
||||
compute 1 all orientorder/atom degrees 5 4 6 8 10 12 nnn NULL cutoff 1.5
|
||||
compute 1 all orientorder/atom degrees 4 6 components 6 nnn NULL cutoff 3.0 :pre
|
||||
|
||||
[Description:]
|
||||
|
||||
@ -71,6 +73,13 @@ The numerical values of all order parameters up to {Q}12
|
||||
for a range of commonly encountered high-symmetry structures are given
|
||||
in Table I of "Mickel et al."_#Mickel.
|
||||
|
||||
The optional keyword {components} will output the components of
|
||||
the normalized complex vector {Ybar_lm} of degree {l}, which must be
|
||||
explicitly included in the keyword {degrees}. This option can be used
|
||||
in conjunction with "compute coord_atom"_compute_coord_atom.html to
|
||||
calculate the ten Wolde's criterion to identify crystal-like particles
|
||||
(see "ten Wolde et al."_#tenWolde96).
|
||||
|
||||
The value of {Ql} is set to zero for atoms not in the
|
||||
specified compute group, as well as for atoms that have less than
|
||||
{nnn} neighbors within the distance cutoff.
|
||||
@ -98,6 +107,12 @@ the neighbor list.
|
||||
This compute calculates a per-atom array with {nlvalues} columns, giving the
|
||||
{Ql} values for each atom, which are real numbers on the range 0 <= {Ql} <= 1.
|
||||
|
||||
If the keyword {components} is set, then the real and imaginary parts of each
|
||||
component of (normalized) {Ybar_lm} will be added to the output array in the
|
||||
following order:
|
||||
Re({Ybar_-m}) Im({Ybar_-m}) Re({Ybar_-m+1}) Im({Ybar_-m+1}) ... Re({Ybar_m}) Im({Ybar_m}).
|
||||
This way, the per-atom array will have a total of {nlvalues}+2*(2{l}+1) columns.
|
||||
|
||||
These values can be accessed by any command that uses
|
||||
per-atom values from a compute as input. See "Section
|
||||
6.15"_Section_howto.html#howto_15 for an overview of LAMMPS output
|
||||
@ -117,5 +132,9 @@ The option defaults are {cutoff} = pair style cutoff, {nnn} = 12, {degrees} = 5
|
||||
|
||||
:link(Steinhardt)
|
||||
[(Steinhardt)] P. Steinhardt, D. Nelson, and M. Ronchetti, Phys. Rev. B 28, 784 (1983).
|
||||
|
||||
:link(Mickel)
|
||||
[(Mickel)] W. Mickel, S. C. Kapfer, G. E. Schroeder-Turkand, K. Mecke, J. Chem. Phys. 138, 044501 (2013).
|
||||
|
||||
:link(tenWolde96)
|
||||
[(tenWolde)] P. R. ten Wolde, M. J. Ruiz-Montero, D. Frenkel, J. Chem. Phys. 104, 9932 (1996).
|
||||
|
||||
@ -35,6 +35,7 @@ Computes :h1
|
||||
compute_erotate_sphere_atom
|
||||
compute_event_displace
|
||||
compute_fep
|
||||
compute_global_atom
|
||||
compute_group_group
|
||||
compute_gyration
|
||||
compute_gyration_chunk
|
||||
|
||||
@ -151,7 +151,7 @@ The option default for the {energy} keyword is energy = no.
|
||||
:line
|
||||
|
||||
:link(Strong)
|
||||
[(Strong)] Strong and Eaves, J. Phys. Chem. Lett. 7, 1907 (2016).
|
||||
[(Strong)] Strong and Eaves, J. Phys. Chem. B 121, 189 (2017).
|
||||
|
||||
:link(Evans)
|
||||
[(Evans)] Evans and Morriss, Phys. Rev. Lett. 56, 2172 (1986).
|
||||
|
||||
@ -29,7 +29,7 @@ fix fxgREM all grem 502 -0.15 -80000 fxnvt :pre
|
||||
[Description:]
|
||||
|
||||
This fix implements the molecular dynamics version of the generalized
|
||||
replica exchange method (gREM) originally developed by "(Kim)"_#Kim,
|
||||
replica exchange method (gREM) originally developed by "(Kim)"_#Kim2010,
|
||||
which uses non-Boltzmann ensembles to sample over first order phase
|
||||
transitions. The is done by defining replicas with an enthalpy
|
||||
dependent effective temperature
|
||||
@ -103,7 +103,7 @@ npt"_fix_nh.html, "thermo_modify"_thermo_modify.html
|
||||
|
||||
:line
|
||||
|
||||
:link(Kim)
|
||||
:link(Kim2010)
|
||||
[(Kim)] Kim, Keyes, Straub, J Chem. Phys, 132, 224107 (2010).
|
||||
|
||||
:link(Malolepsza)
|
||||
|
||||
130
doc/src/fix_mscg.txt
Normal file
130
doc/src/fix_mscg.txt
Normal file
@ -0,0 +1,130 @@
|
||||
"LAMMPS WWW Site"_lws - "LAMMPS Documentation"_ld - "LAMMPS Commands"_lc :c
|
||||
|
||||
:link(lws,http://lammps.sandia.gov)
|
||||
:link(ld,Manual.html)
|
||||
:link(lc,Section_commands.html#comm)
|
||||
|
||||
:line
|
||||
|
||||
fix mscg command :h3
|
||||
|
||||
[Syntax:]
|
||||
|
||||
fix ID group-ID mscg N keyword args ... :pre
|
||||
|
||||
ID, group-ID are documented in "fix"_fix.html command :ulb,l
|
||||
mscg = style name of this fix command :l
|
||||
N = envoke this fix every this many timesteps :l
|
||||
zero or more keyword/value pairs may be appended :l
|
||||
keyword = {range} or {name} or {max} :l
|
||||
{range} arg = {on} or {off}
|
||||
{on} = range finding functionality is performed
|
||||
{off} = force matching functionality is performed
|
||||
{name} args = name1 ... nameN
|
||||
name1,...,nameN = string names for each atom type (1-Ntype)
|
||||
{max} args = maxb maxa maxd
|
||||
maxb,maxa,maxd = maximum bonds/angles/dihedrals per atom :pre
|
||||
:ule
|
||||
|
||||
[Examples:]
|
||||
|
||||
fix 1 all mscg 1
|
||||
fix 1 all mscg 1 range name A B
|
||||
fix 1 all mscg 1 max 4 8 20 :pre
|
||||
|
||||
[Description:]
|
||||
|
||||
This fix applies the Multi-Scale Coarse-Graining (MSCG) method to
|
||||
snapshots from a dump file to generate potentials for coarse-grained
|
||||
simulations from all-atom simulations, using a force-matching
|
||||
technique ("Izvekov"_#Izvekov, "Noid"_#Noid).
|
||||
|
||||
It makes use of the MS-CG library, written and maintained by Greg
|
||||
Voth's group at the University of Chicago, which is freely available
|
||||
on their "MS-CG GitHub
|
||||
site"_https://github.com/uchicago-voth/MSCG-release. See instructions
|
||||
on obtaining and installing the MS-CG library in the src/MSCG/README
|
||||
file, which must be done before you build LAMMPS with this fix command
|
||||
and use the command in a LAMMPS input script.
|
||||
|
||||
An example script using this fix is provided the examples/mscg
|
||||
directory.
|
||||
|
||||
The general workflow for using LAMMPS in conjunction with the MS-CG
|
||||
library to create a coarse-grained model and run coarse-grained
|
||||
simulations is as follows:
|
||||
|
||||
Perform all-atom simulations on the system to be coarse grained.
|
||||
Generate a trajectory mapped to the coarse-grained model.
|
||||
Create input files for the MS-CG library.
|
||||
Run the range finder functionality of the MS-CG library.
|
||||
Run the force matching functionality of the MS-CG library.
|
||||
Check the results of the force matching.
|
||||
Run coarse-grained simulations using the new coarse-grained potentials. :ol
|
||||
|
||||
This fix can perform the range finding and force matching steps 4 and
|
||||
5 of the above workflow when used in conjunction with the
|
||||
"rerun"_rerun.html command. It does not perform steps 1-3 and 6-7.
|
||||
|
||||
Step 2 can be performed using a Python script (what is the name?)
|
||||
provided with the MS-CG library which defines the coarse-grained model
|
||||
and converts a standard LAMMPS dump file for an all-atom simulation
|
||||
(step 1) into a LAMMPS dump file which has the positions of and forces
|
||||
on the coarse-grained beads.
|
||||
|
||||
In step 3, an input file named "control.in" is needed by the MS-CG
|
||||
library which sets parameters for the range finding and force matching
|
||||
functionalities. See the examples/mscg/control.in file as an example.
|
||||
And see the documentation provided with the MS-CG library for more
|
||||
info on this file.
|
||||
|
||||
When this fix is used to perform steps 4 and 5, the MS-CG library also
|
||||
produces additional output files. The range finder functionality
|
||||
(step 4) outputs files defining pair and bonded interaction ranges.
|
||||
The force matching functionality (step 5) outputs tabulated force
|
||||
files for every interaction in the system. Other diagnostic files can
|
||||
also be output depending on the paramters in the MS-CG library input
|
||||
script. Again, see the documentation provided with the MS-CG library
|
||||
for more info.
|
||||
|
||||
:line
|
||||
|
||||
The {range} keyword specifies which MS-CG library functionality should
|
||||
be invoked. If {on}, the step 4 range finder functionality is invoked.
|
||||
{off}, the step 5 force matching functionality is invoked.
|
||||
|
||||
If the {name} keyword is used, string names are defined to associate
|
||||
with the integer atom types in LAMMPS. {Ntype} names must be
|
||||
provided, one for each atom type (1-Ntype).
|
||||
|
||||
The {max} keyword specifies the maximum number of bonds, angles, and
|
||||
dihedrals a bead can have in the coarse-grained model.
|
||||
|
||||
[Restrictions:]
|
||||
|
||||
This fix is part of the MSCG package. It is only enabled if LAMMPS was
|
||||
built with that package. See the "Making
|
||||
LAMMPS"_Section_start.html#start_3 section for more info.
|
||||
|
||||
The MS-CG library uses C++11, which may not be supported by older
|
||||
compilers. The MS-CG library also has some additional numeric library
|
||||
dependencies, which are describd in its documentation.
|
||||
|
||||
Currently, the MS-CG library is not setup to run in parallel with MPI,
|
||||
so this fix can only be used in a serial LAMMPS build and run
|
||||
on a single processor.
|
||||
|
||||
[Related commands:] none
|
||||
|
||||
[Default:]
|
||||
|
||||
The default keyword settings are range off, max 4 12 36.
|
||||
|
||||
:line
|
||||
|
||||
:link(Izvekov)
|
||||
[(Izvekov)] Izvekov, Voth, J Chem Phys 123, 134105 (2005).
|
||||
|
||||
:link(Noid)
|
||||
[(Noid)] Noid, Chu, Ayton, Krishna, Izvekov, Voth, Das, Andersen, J
|
||||
Chem Phys 128, 134105 (2008).
|
||||
@ -89,11 +89,7 @@ NOTE: The center of mass of a group of atoms is calculated in
|
||||
group can straddle a periodic boundary. See the "dump"_dump.html doc
|
||||
page for a discussion of unwrapped coordinates. It also means that a
|
||||
spring connecting two groups or a group and the tether point can cross
|
||||
a periodic boundary and its length be calculated correctly. One
|
||||
exception is for rigid bodies, which should not be used with the fix
|
||||
spring command, if the rigid body will cross a periodic boundary.
|
||||
This is because image flags for rigid bodies are used in a different
|
||||
way, as explained on the "fix rigid"_fix_rigid.html doc page.
|
||||
a periodic boundary and its length be calculated correctly.
|
||||
|
||||
[Restart, fix_modify, output, run start/stop, minimize info:]
|
||||
|
||||
|
||||
@ -68,6 +68,7 @@ Fixes :h1
|
||||
fix_meso_stationary
|
||||
fix_momentum
|
||||
fix_move
|
||||
fix_mscg
|
||||
fix_msst
|
||||
fix_neb
|
||||
fix_nh
|
||||
|
||||
@ -23,6 +23,7 @@ Section_history.html
|
||||
|
||||
tutorial_drude.html
|
||||
tutorial_github.html
|
||||
tutorial_pylammps.html
|
||||
|
||||
body.html
|
||||
manifolds.html
|
||||
@ -113,6 +114,7 @@ special_bonds.html
|
||||
suffix.html
|
||||
tad.html
|
||||
temper.html
|
||||
temper_grem.html
|
||||
thermo.html
|
||||
thermo_modify.html
|
||||
thermo_style.html
|
||||
@ -192,6 +194,7 @@ fix_meso.html
|
||||
fix_meso_stationary.html
|
||||
fix_momentum.html
|
||||
fix_move.html
|
||||
fix_mscg.html
|
||||
fix_msst.html
|
||||
fix_neb.html
|
||||
fix_nh.html
|
||||
|
||||
@ -8,6 +8,7 @@
|
||||
|
||||
pair_style eam command :h3
|
||||
pair_style eam/gpu command :h3
|
||||
pair_style eam/intel command :h3
|
||||
pair_style eam/kk command :h3
|
||||
pair_style eam/omp command :h3
|
||||
pair_style eam/opt command :h3
|
||||
|
||||
@ -15,11 +15,12 @@ read_dump file Nstep field1 field2 ... keyword values ... :pre
|
||||
file = name of dump file to read :ulb,l
|
||||
Nstep = snapshot timestep to read from file :l
|
||||
one or more fields may be appended :l
|
||||
field = {x} or {y} or {z} or {vx} or {vy} or {vz} or {q} or {ix} or {iy} or {iz}
|
||||
field = {x} or {y} or {z} or {vx} or {vy} or {vz} or {q} or {ix} or {iy} or {iz} or {fx} or {fy} or {fz}
|
||||
{x},{y},{z} = atom coordinates
|
||||
{vx},{vy},{vz} = velocity components
|
||||
{q} = charge
|
||||
{ix},{iy},{iz} = image flags in each dimension :pre
|
||||
{ix},{iy},{iz} = image flags in each dimension
|
||||
{fx},{fy},{fz} = force components :pre
|
||||
zero or more keyword/value pairs may be appended :l
|
||||
keyword = {box} or {replace} or {purge} or {trim} or {add} or {label} or {scaled} or {wrapped} or {format} :l
|
||||
{box} value = {yes} or {no} = replace simulation box with dump box
|
||||
|
||||
@ -32,7 +32,7 @@ Run a parallel tempering or replica exchange simulation in LAMMPS
|
||||
partition mode using multiple generalized replicas (ensembles) of a
|
||||
system defined by "fix grem"_fix_grem.html, which stands for the
|
||||
generalized replica exchange method (gREM) originally developed by
|
||||
"(Kim)"_#Kim. It uses non-Boltzmann ensembles to sample over first
|
||||
"(Kim)"_#KimStraub. It uses non-Boltzmann ensembles to sample over first
|
||||
order phase transitions. The is done by defining replicas with an
|
||||
enthalpy dependent effective temperature
|
||||
|
||||
@ -105,5 +105,5 @@ This command must be used with "fix grem"_fix_grem.html.
|
||||
|
||||
[Default:] none
|
||||
|
||||
:link(Kim)
|
||||
:link(KimStraub)
|
||||
[(Kim)] Kim, Keyes, Straub, J Chem Phys, 132, 224107 (2010).
|
||||
|
||||
@ -33,14 +33,14 @@ timer loop :pre
|
||||
Select the level of detail at which LAMMPS performs its CPU timings.
|
||||
Multiple keywords can be specified with the {timer} command. For
|
||||
keywords that are mutually exclusive, the last one specified takes
|
||||
effect.
|
||||
precedence.
|
||||
|
||||
During a simulation run LAMMPS collects information about how much
|
||||
time is spent in different sections of the code and thus can provide
|
||||
information for determining performance and load imbalance problems.
|
||||
This can be done at different levels of detail and accuracy. For more
|
||||
information about the timing output, see this "discussion of screen
|
||||
output"_Section_start.html#start_8.
|
||||
output in Section 2.8"_Section_start.html#start_8.
|
||||
|
||||
The {off} setting will turn all time measurements off. The {loop}
|
||||
setting will only measure the total time for a run and not collect any
|
||||
@ -52,20 +52,22 @@ procsessors. The {full} setting adds information about CPU
|
||||
utilization and thread utilization, when multi-threading is enabled.
|
||||
|
||||
With the {sync} setting, all MPI tasks are synchronized at each timer
|
||||
call which meaures load imbalance more accuractly, though it can also
|
||||
slow down the simulation. Using the {nosync} setting (which is the
|
||||
default) turns off this synchronization.
|
||||
call which measures load imbalance for each section more accuractly,
|
||||
though it can also slow down the simulation by prohibiting overlapping
|
||||
independent computations on different MPI ranks Using the {nosync}
|
||||
setting (which is the default) turns this synchronization off.
|
||||
|
||||
With the {timeout} keyword a walltime limit can be imposed that
|
||||
With the {timeout} keyword a walltime limit can be imposed, that
|
||||
affects the "run"_run.html and "minimize"_minimize.html commands.
|
||||
This can be convenient when runs have to confirm to time limits,
|
||||
e.g. when running under a batch system and you want to maximize
|
||||
the utilization of the batch time slot, especially when the time
|
||||
per timestep varies and is thus difficult to predict how many
|
||||
steps a simulation can perform, or for difficult to converge
|
||||
minimizations. The timeout {elapse} value should be somewhat smaller
|
||||
than the time requested from the batch system, as there is usually
|
||||
some overhead to launch jobs, and it may be advisable to write
|
||||
This can be convenient when calculations have to comply with execution
|
||||
time limits, e.g. when running under a batch system when you want to
|
||||
maximize the utilization of the batch time slot, especially for runs
|
||||
where the time per timestep varies much and thus it becomes difficult
|
||||
to predict how many steps a simulation can perform for a given walltime
|
||||
limit. This also applies for difficult to converge minimizations.
|
||||
The timeout {elapse} value should be somewhat smaller than the maximum
|
||||
wall time requested from the batch system, as there is usually
|
||||
some overhead to launch jobs, and it is advisable to write
|
||||
out a restart after terminating a run due to a timeout.
|
||||
|
||||
The timeout timer starts when the command is issued. When the time
|
||||
|
||||
@ -336,12 +336,15 @@ commit and push again:
|
||||
$ git commit -m "Merged Axel's suggestions and updated text"
|
||||
$ git push git@github.com:Pakketeretet2/lammps :pre
|
||||
|
||||
This merge also shows up on the lammps Github page:
|
||||
|
||||
:c,image(JPG/tutorial_reverse_pull_request7.png)
|
||||
|
||||
:line
|
||||
|
||||
[After a merge]
|
||||
|
||||
When everything is fine, the feature branch is merged into the master branch.
|
||||
When everything is fine, the feature branch is merged into the master branch:
|
||||
|
||||
:c,image(JPG/tutorial_merged.png)
|
||||
|
||||
|
||||
@ -82,6 +82,7 @@ meam: MEAM test for SiC and shear (same as shear examples)
|
||||
melt: rapid melt of 3d LJ system
|
||||
micelle: self-assembly of small lipid-like molecules into 2d bilayers
|
||||
min: energy minimization of 2d LJ melt
|
||||
mscg: parameterize a multi-scale coarse-graining (MSCG) model
|
||||
msst: MSST shock dynamics
|
||||
nb3b: use of nonbonded 3-body harmonic pair style
|
||||
neb: nudged elastic band (NEB) calculation for barrier finding
|
||||
|
||||
@ -18,7 +18,7 @@ neigh_modify every 1 delay 0 check no once no
|
||||
timestep 0.001
|
||||
|
||||
compute dpdU all dpd
|
||||
variable totEnergy equal pe+ke+c_dpdU[1]+c_dpdU[1]+press*vol
|
||||
variable totEnergy equal pe+ke+c_dpdU[1]+c_dpdU[2]+press*vol
|
||||
|
||||
thermo 1
|
||||
thermo_style custom step temp press vol pe ke v_totEnergy cella cellb cellc
|
||||
|
||||
@ -22,7 +22,7 @@ neigh_modify every 1 delay 0 check no once no
|
||||
timestep 0.001
|
||||
|
||||
compute dpdU all dpd
|
||||
variable totEnergy equal pe+ke+c_dpdU[1]+c_dpdU[1]+press*vol
|
||||
variable totEnergy equal pe+ke+c_dpdU[1]+c_dpdU[2]+press*vol
|
||||
|
||||
thermo 1
|
||||
thermo_style custom step temp press vol pe ke v_totEnergy cella cellb cellc
|
||||
@ -34,129 +34,137 @@ fix 2 all eos/cv 0.0005
|
||||
|
||||
run 100
|
||||
Neighbor list info ...
|
||||
1 neighbor list requests
|
||||
update every 1 steps, delay 0 steps, check no
|
||||
max neighbors/atom: 2000, page size: 100000
|
||||
master list distance cutoff = 12
|
||||
ghost atom cutoff = 12
|
||||
binsize = 6 -> bins = 22 22 22
|
||||
Memory usage per processor = 6.48143 Mbytes
|
||||
binsize = 6, bins = 22 22 22
|
||||
2 neighbor lists, perpetual/occasional/extra = 2 0 0
|
||||
(1) pair dpd/fdt/energy, perpetual
|
||||
pair build: half/bin/newton
|
||||
stencil: half/bin/3d/newton
|
||||
bin: standard
|
||||
(2) fix shardlow, perpetual, ssa
|
||||
pair build: half/bin/newton/ssa
|
||||
stencil: half/bin/3d/newton/ssa
|
||||
bin: ssa
|
||||
Memory usage per processor = 8.55503 Mbytes
|
||||
Step Temp Press Volume PotEng KinEng v_totEnergy Cella Cellb Cellc
|
||||
0 239.4274282976 2817.4421750949 2146689.0000000000 2639.8225470740 313.3218455755 6048176597.3066043854 129.0000000000 129.0000000000 129.0000000000
|
||||
1 239.4771405316 2817.4798146419 2146689.0000581890 2639.8304543632 313.3869004818 6048257397.9450111389 129.0000000012 129.0000000012 129.0000000012
|
||||
2 239.5643955010 2817.5423194969 2146689.0002327557 2639.8379071907 313.5010849268 6048391577.0431985855 129.0000000047 129.0000000047 129.0000000047
|
||||
3 239.6633839196 2817.6123662396 2146689.0005237064 2639.8445238058 313.6306241122 6048541946.5712032318 129.0000000105 129.0000000105 129.0000000105
|
||||
4 239.5371222027 2817.5355424336 2146689.0009310376 2639.8505035043 313.4653942786 6048377030.7404460907 129.0000000186 129.0000000186 129.0000000186
|
||||
5 239.6512678169 2817.6153097076 2146689.0014547524 2639.8561498340 313.6147686202 6048548267.9007377625 129.0000000291 129.0000000291 129.0000000291
|
||||
6 239.5617886781 2817.5624195435 2146689.0020948485 2639.8617493725 313.4976735610 6048434730.8592004776 129.0000000420 129.0000000420 129.0000000420
|
||||
7 239.5228587856 2817.5420009502 2146689.0028513218 2639.8666590407 313.4467287471 6048390900.5748577118 129.0000000571 129.0000000571 129.0000000571
|
||||
8 239.6066877934 2817.6008649264 2146689.0037241788 2639.8710757645 313.5564298772 6048517265.7987136841 129.0000000746 129.0000000746 129.0000000746
|
||||
9 239.5719861485 2817.5823530300 2146689.0047134170 2639.8752557893 313.5110182737 6048477529.2603597641 129.0000000944 129.0000000944 129.0000000944
|
||||
10 239.5800176776 2817.5915671176 2146689.0058190385 2639.8793778438 313.5215285712 6048497312.1706552505 129.0000001166 129.0000001166 129.0000001166
|
||||
11 239.6299830954 2817.6281223139 2146689.0070410441 2639.8829762049 313.5869148014 6048575788.3208351135 129.0000001410 129.0000001410 129.0000001410
|
||||
12 239.6011995911 2817.6132377273 2146689.0083794324 2639.8860704236 313.5492478526 6048543839.4788360596 129.0000001678 129.0000001678 129.0000001678
|
||||
13 239.6407681166 2817.6427924824 2146689.0098342048 2639.8889816934 313.6010284005 6048607288.5005025864 129.0000001970 129.0000001970 129.0000001970
|
||||
14 239.6981172055 2817.6844100046 2146689.0114053637 2639.8913405110 313.6760771219 6048696632.8825626373 129.0000002285 129.0000002285 129.0000002285
|
||||
15 239.8563971968 2817.7922519039 2146689.0130929090 2639.8934358481 313.8832070208 6048928140.8671455383 129.0000002623 129.0000002623 129.0000002623
|
||||
16 239.8561894618 2817.7971208197 2146689.0148968464 2639.8950496967 313.8829351726 6048938597.9994916916 129.0000002984 129.0000002984 129.0000002984
|
||||
17 239.8816520361 2817.8185621543 2146689.0168171758 2639.8961257823 313.9162562538 6048984631.3226108551 129.0000003369 129.0000003369 129.0000003369
|
||||
18 239.9099966096 2817.8417368960 2146689.0188538977 2639.8965743204 313.9533488047 6049034386.0627622604 129.0000003777 129.0000003777 129.0000003777
|
||||
19 240.0514024347 2817.9389205774 2146689.0210070144 2639.8966103811 314.1383966683 6049243015.4568052292 129.0000004208 129.0000004208 129.0000004208
|
||||
20 239.8802541140 2817.8327386176 2146689.0232765260 2639.8962085210 313.9144268914 6049015081.9802341461 129.0000004662 129.0000004662 129.0000004662
|
||||
21 239.8462621903 2817.8160306167 2146689.0256624296 2639.8953174755 313.8699440502 6048979221.7758703232 129.0000005140 129.0000005140 129.0000005140
|
||||
22 240.0487944678 2817.9533849157 2146689.0281647225 2639.8938590354 314.1349838054 6049274086.0571212769 129.0000005642 129.0000005642 129.0000005642
|
||||
23 240.0966314441 2817.9897873787 2146689.0307834130 2639.8918104774 314.1975846937 6049352238.2649183273 129.0000006166 129.0000006166 129.0000006166
|
||||
24 240.1765312516 2818.0463843765 2146689.0335185044 2639.8891292321 314.3021439554 6049473742.2287187576 129.0000006714 129.0000006714 129.0000006714
|
||||
25 240.1500705973 2818.0336048048 2146689.0363699966 2639.8858785483 314.2675167572 6049446316.4600162506 129.0000007285 129.0000007285 129.0000007285
|
||||
26 240.2681423500 2818.1151708195 2146689.0393378921 2639.8825176506 314.4220289603 6049621421.8445177078 129.0000007880 129.0000007880 129.0000007880
|
||||
27 240.4728815247 2818.2527327079 2146689.0424221945 2639.8784158747 314.6899567267 6049916733.3989181519 129.0000008498 129.0000008498 129.0000008498
|
||||
28 240.4793027032 2818.2613348477 2146689.0456229053 2639.8736089473 314.6983596717 6049935208.5421981812 129.0000009139 129.0000009139 129.0000009139
|
||||
29 240.5020619198 2818.2805472685 2146689.0489400285 2639.8681043704 314.7281430587 6049976461.0082206726 129.0000009803 129.0000009803 129.0000009803
|
||||
30 240.5513721776 2818.3167157263 2146689.0523735629 2639.8623484053 314.7926719270 6050054113.1760177612 129.0000010491 129.0000010491 129.0000010491
|
||||
31 240.7340393104 2818.4391703712 2146689.0559235099 2639.8563442170 315.0317155636 6050316995.4599781036 129.0000011202 129.0000011202 129.0000011202
|
||||
32 240.8254719483 2818.5014640740 2146689.0595898777 2639.8498122053 315.1513670299 6050450731.1168394089 129.0000011936 129.0000011936 129.0000011936
|
||||
33 240.9681573541 2818.5965480750 2146689.0633726656 2639.8425779528 315.3380893908 6050654857.7432861328 129.0000012694 129.0000012694 129.0000012694
|
||||
34 241.0039494187 2818.6217008564 2146689.0672718794 2639.8347174393 315.3849279499 6050708863.9733209610 129.0000013475 129.0000013475 129.0000013475
|
||||
35 241.0314566197 2818.6411150538 2146689.0712875174 2639.8262983643 315.4209246902 6050750551.5649127960 129.0000014279 129.0000014279 129.0000014279
|
||||
36 241.0829173424 2818.6763455617 2146689.0754195810 2639.8174397481 315.4882677207 6050826192.2165899277 129.0000015107 129.0000015107 129.0000015107
|
||||
37 241.2845682012 2818.8087982181 2146689.0796680767 2639.8080129872 315.7521540252 6051110539.1171846390 129.0000015958 129.0000015958 129.0000015958
|
||||
38 241.3214712920 2818.8336260248 2146689.0840330068 2639.7981963574 315.8004465062 6051163849.0412235260 129.0000016833 129.0000016833 129.0000016833
|
||||
39 241.3392127125 2818.8456991528 2146689.0885143690 2639.7879618658 315.8236634561 6051189778.9386901855 129.0000017730 129.0000017730 129.0000017730
|
||||
40 241.5383770555 2818.9753950055 2146689.0931121684 2639.7769824244 316.0842958321 6051468208.8210506439 129.0000018651 129.0000018651 129.0000018651
|
||||
41 241.5059730674 2818.9543817992 2146689.0978264087 2639.7656512498 316.0418910106 6051423113.2358427048 129.0000019595 129.0000019595 129.0000019595
|
||||
42 241.3907605672 2818.8793800508 2146689.1026570834 2639.7541331920 315.8911205101 6051262121.2551422119 129.0000020563 129.0000020563 129.0000020563
|
||||
43 241.5095917610 2818.9559595711 2146689.1076041958 2639.7424355740 316.0466265406 6051426527.7663059235 129.0000021554 129.0000021554 129.0000021554
|
||||
44 241.6271631762 2819.0312325531 2146689.1126677482 2639.7297705654 316.2004839873 6051588129.8722610474 129.0000022568 129.0000022568 129.0000022568
|
||||
45 241.5702411838 2818.9923790176 2146689.1178477411 2639.7163554760 316.1259941770 6051504737.9250564575 129.0000023606 129.0000023606 129.0000023606
|
||||
46 241.7029985068 2819.0771124986 2146689.1231441777 2639.7024246704 316.2997243538 6051686649.4576120377 129.0000024667 129.0000024667 129.0000024667
|
||||
47 241.7966144965 2819.1357830868 2146689.1285570571 2639.6882106593 316.4222330191 6051812612.3391046524 129.0000025751 129.0000025751 129.0000025751
|
||||
48 241.8573480255 2819.1726205120 2146689.1340863821 2639.6735287925 316.5017107195 6051891706.4921989441 129.0000026859 129.0000026859 129.0000026859
|
||||
49 241.9611147338 2819.2374095379 2146689.1397321564 2639.6583357477 316.6375029166 6052030804.4275226593 129.0000027990 129.0000027990 129.0000027990
|
||||
50 242.1023518806 2819.3259059811 2146689.1454943856 2639.6424863169 316.8223300428 6052220795.1955394745 129.0000029144 129.0000029144 129.0000029144
|
||||
51 242.1174105473 2819.3319633044 2146689.1513730693 2639.6264141131 316.8420362613 6052233814.9634265900 129.0000030321 129.0000030321 129.0000030321
|
||||
52 242.2534914901 2819.4164594322 2146689.1573682069 2639.6098392670 317.0201158259 6052415218.9485445023 129.0000031522 129.0000031522 129.0000031522
|
||||
53 242.3504633236 2819.4754119996 2146689.1634798055 2639.5930076506 317.1470160479 6052541789.1274013519 129.0000032746 129.0000032746 129.0000032746
|
||||
54 242.2982323323 2819.4368568264 2146689.1697078613 2639.5756353782 317.0786650211 6052459040.6286897659 129.0000033994 129.0000033994 129.0000033994
|
||||
55 242.3452896272 2819.4623310219 2146689.1760523771 2639.5575918586 317.1402455951 6052513743.7400159836 129.0000035265 129.0000035265 129.0000035265
|
||||
56 242.4181903333 2819.5048897011 2146689.1825133534 2639.5390347547 317.2356456249 6052605122.2894439697 129.0000036559 129.0000036559 129.0000036559
|
||||
57 242.5317091656 2819.5739975787 2146689.1890907930 2639.5199828249 317.3841997413 6052753494.0979280472 129.0000037876 129.0000037876 129.0000037876
|
||||
58 242.5478978740 2819.5796954935 2146689.1957846982 2639.5006137388 317.4053847660 6052765744.6257629395 129.0000039217 129.0000039217 129.0000039217
|
||||
59 242.6655316466 2819.6519225743 2146689.2025950695 2639.4808234811 317.5593238156 6052920813.0568208694 129.0000040582 129.0000040582 129.0000040582
|
||||
60 242.8126131177 2819.7431588157 2146689.2095219092 2639.4607996998 317.7517989980 6053116688.6155729294 129.0000041969 129.0000041969 129.0000041969
|
||||
61 242.7957124913 2819.7275989047 2146689.2165652174 2639.4406312730 317.7296823362 6053083306.1403274536 129.0000043380 129.0000043380 129.0000043380
|
||||
62 242.9276177041 2819.8088790098 2146689.2237249981 2639.4201279058 317.9022974164 6053257809.6067762375 129.0000044814 129.0000044814 129.0000044814
|
||||
63 243.0465445938 2819.8814758895 2146689.2310012528 2639.3991657500 318.0579286774 6053413673.1989650726 129.0000046272 129.0000046272 129.0000046272
|
||||
64 242.9890585501 2819.8387587817 2146689.2383939880 2639.3781767844 317.9827007328 6053321993.5937871933 129.0000047752 129.0000047752 129.0000047752
|
||||
65 242.9653746583 2819.8180104181 2146689.2459031967 2639.3568184374 317.9517072884 6053277474.4272727966 129.0000049256 129.0000049256 129.0000049256
|
||||
66 243.0259297024 2819.8514334947 2146689.2535288804 2639.3352568621 318.0309514181 6053349244.9473772049 129.0000050784 129.0000050784 129.0000050784
|
||||
67 242.9638979697 2819.8046112742 2146689.2612710390 2639.3134547096 317.9497748498 6053248753.9180717468 129.0000052335 129.0000052335 129.0000052335
|
||||
68 243.0283540775 2819.8395632725 2146689.2691296688 2639.2912303374 318.0341240273 6053323807.2197017670 129.0000053909 129.0000053909 129.0000053909
|
||||
69 243.2256418664 2819.9609646019 2146689.2771047787 2639.2684509205 318.2923006889 6053584440.8757400513 129.0000055506 129.0000055506 129.0000055506
|
||||
70 243.2507495334 2819.9706145524 2146689.2851963686 2639.2450126010 318.3251573278 6053605179.1483964920 129.0000057127 129.0000057127 129.0000057127
|
||||
71 243.4287155518 2820.0794853386 2146689.2934044413 2639.2213699915 318.5580489464 6053838914.2552747726 129.0000058771 129.0000058771 129.0000058771
|
||||
72 243.5097518574 2820.1249498194 2146689.3017290002 2639.1971212009 318.6640954635 6053936535.9274711609 129.0000060439 129.0000060439 129.0000060439
|
||||
73 243.5356790969 2820.1337977544 2146689.3101700447 2639.1723394661 318.6980246193 6053955553.5090074539 129.0000062130 129.0000062130 129.0000062130
|
||||
74 243.5479180498 2820.1331964183 2146689.3187275808 2639.1473868749 318.7140408766 6053954286.7515821457 129.0000063844 129.0000063844 129.0000063844
|
||||
75 243.7115573025 2820.2314361523 2146689.3274016059 2639.1220411207 318.9281840641 6054165201.5909118652 129.0000065581 129.0000065581 129.0000065581
|
||||
76 243.7457279618 2820.2454531429 2146689.3361921217 2639.0963868224 318.9729008040 6054195316.5254154205 129.0000067342 129.0000067342 129.0000067342
|
||||
77 243.8345031069 2820.2948644965 2146689.3450991292 2639.0700900389 319.0890745962 6054301412.5615310669 129.0000069126 129.0000069126 129.0000069126
|
||||
78 244.0193931195 2820.4067881628 2146689.3541226317 2639.0435094409 319.3310271594 6054541703.5689058304 129.0000070934 129.0000070934 129.0000070934
|
||||
79 243.9919100078 2820.3799166166 2146689.3632626338 2639.0164249037 319.2950619430 6054484044.4218587875 129.0000072765 129.0000072765 129.0000072765
|
||||
80 244.0965612207 2820.4387335935 2146689.3725191355 2638.9888176882 319.4320116291 6054610332.4174261093 129.0000074619 129.0000074619 129.0000074619
|
||||
81 244.1334315951 2820.4535208568 2146689.3818921377 2638.9608330195 319.4802612965 6054642102.5347270966 129.0000076496 129.0000076496 129.0000076496
|
||||
82 244.3029520408 2820.5543485196 2146689.3913816395 2638.9318525796 319.7021007878 6054858575.1664342880 129.0000078397 129.0000078397 129.0000078397
|
||||
83 244.3445761189 2820.5713690935 2146689.4009876498 2638.9021684795 319.7565712929 6054895140.1710596085 129.0000080321 129.0000080321 129.0000080321
|
||||
84 244.2696671559 2820.5125763350 2146689.4107101629 2638.8720941742 319.6585431986 6054768957.6739044189 129.0000082269 129.0000082269 129.0000082269
|
||||
85 244.5161919319 2820.6629431352 2146689.4205491822 2638.8415194387 319.9811528443 6055091776.5361995697 129.0000084240 129.0000084240 129.0000084240
|
||||
86 244.5641090282 2820.6838080201 2146689.4305047127 2638.8103612394 320.0438585800 6055136595.0767974854 129.0000086234 129.0000086234 129.0000086234
|
||||
87 244.5348240638 2820.6541129118 2146689.4405767513 2638.7789728309 320.0055354056 6055072877.2416200638 129.0000088251 129.0000088251 129.0000088251
|
||||
88 244.6939431427 2820.7468233396 2146689.4507653015 2638.7470269267 320.2137633592 6055271926.6536149979 129.0000090292 129.0000090292 129.0000090292
|
||||
89 244.8800201091 2820.8567117003 2146689.4610703662 2638.7147520097 320.4572692055 6055507852.1186332703 129.0000092356 129.0000092356 129.0000092356
|
||||
90 244.8804280382 2820.8451141876 2146689.4714919478 2638.6820441173 320.4578030336 6055482985.2258749008 129.0000094444 129.0000094444 129.0000094444
|
||||
91 244.9558851986 2820.8815975090 2146689.4820300462 2638.6491836104 320.5565485155 6055561333.3803453445 129.0000096555 129.0000096555 129.0000096555
|
||||
92 244.9965893140 2820.8949614294 2146689.4926846647 2638.6159817170 320.6098151301 6055590051.6433181763 129.0000098689 129.0000098689 129.0000098689
|
||||
93 245.1381056687 2820.9732811388 2146689.5034558061 2638.5824451870 320.7950076360 6055758210.2774200439 129.0000100846 129.0000100846 129.0000100846
|
||||
94 245.2954807041 2821.0619342131 2146689.5143434699 2638.5485198222 321.0009532826 6055948551.7882709503 129.0000103027 129.0000103027 129.0000103027
|
||||
95 245.3535822199 2821.0860553731 2146689.5253476589 2638.5144817512 321.0769866522 6056000363.5151576996 129.0000105232 129.0000105232 129.0000105232
|
||||
96 245.5013476026 2821.1682908185 2146689.5364683764 2638.4801107361 321.2703568219 6056176929.0169925690 129.0000107459 129.0000107459 129.0000107459
|
||||
97 245.4166531417 2821.0989038023 2146689.5477056229 2638.4453663061 321.1595231342 6056028008.1910057068 129.0000109710 129.0000109710 129.0000109710
|
||||
98 245.4121937790 2821.0817490953 2146689.5590593945 2638.4097762390 321.1536874797 6055991214.3494396210 129.0000111984 129.0000111984 129.0000111984
|
||||
99 245.4532592994 2821.0946353191 2146689.5705296928 2638.3738037546 321.2074270397 6056018909.4480972290 129.0000114282 129.0000114282 129.0000114282
|
||||
100 245.7500657390 2821.2735939427 2146689.5821165247 2638.3375549051 321.5958367642 6056403111.1006488800 129.0000116603 129.0000116603 129.0000116603
|
||||
Loop time of 4.05006 on 1 procs for 100 steps with 10125 atoms
|
||||
0 239.4274282976 2817.4421750949 2146689.0000000000 2639.8225470740 313.3218455755 6048176597.3066034317 129.0000000000 129.0000000000 129.0000000000
|
||||
1 239.4771405316 2817.4798146419 2146689.0000581890 2639.8304543632 313.3869004818 6048257397.8720483780 129.0000000012 129.0000000012 129.0000000012
|
||||
2 239.5643955010 2817.5423194969 2146689.0002327557 2639.8379071907 313.5010849268 6048391576.8485937119 129.0000000047 129.0000000047 129.0000000047
|
||||
3 239.6633839196 2817.6123662396 2146689.0005237064 2639.8445238058 313.6306241122 6048541946.2404479980 129.0000000105 129.0000000105 129.0000000105
|
||||
4 239.5371222027 2817.5355424336 2146689.0009310376 2639.8505035043 313.4653942786 6048377030.5689325333 129.0000000186 129.0000000186 129.0000000186
|
||||
5 239.6512678169 2817.6153097076 2146689.0014547524 2639.8561498340 313.6147686202 6048548267.5742130280 129.0000000291 129.0000000291 129.0000000291
|
||||
6 239.5617886781 2817.5624195435 2146689.0020948485 2639.8617493725 313.4976735610 6048434730.6441593170 129.0000000420 129.0000000420 129.0000000420
|
||||
7 239.5228587856 2817.5420009502 2146689.0028513218 2639.8666590407 313.4467287471 6048390900.4058599472 129.0000000571 129.0000000571 129.0000000571
|
||||
8 239.6066877934 2817.6008649264 2146689.0037241788 2639.8710757645 313.5564298772 6048517265.5155982971 129.0000000746 129.0000000746 129.0000000746
|
||||
9 239.5719861485 2817.5823530300 2146689.0047134170 2639.8752557893 313.5110182737 6048477529.0184717178 129.0000000944 129.0000000944 129.0000000944
|
||||
10 239.5800176776 2817.5915671176 2146689.0058190385 2639.8793778438 313.5215285712 6048497311.9141387939 129.0000001166 129.0000001166 129.0000001166
|
||||
11 239.6299830954 2817.6281223139 2146689.0070410441 2639.8829762049 313.5869148014 6048575787.9953098297 129.0000001410 129.0000001410 129.0000001410
|
||||
12 239.6011995911 2817.6132377273 2146689.0083794324 2639.8860704236 313.5492478526 6048543839.1878814697 129.0000001678 129.0000001678 129.0000001678
|
||||
13 239.6407681166 2817.6427924824 2146689.0098342048 2639.8889816934 313.6010284005 6048607288.1548709869 129.0000001970 129.0000001970 129.0000001970
|
||||
14 239.6981172055 2817.6844100046 2146689.0114053637 2639.8913405110 313.6760771219 6048696632.4595127106 129.0000002285 129.0000002285 129.0000002285
|
||||
15 239.8563971968 2817.7922519039 2146689.0130929090 2639.8934358481 313.8832070208 6048928140.2348766327 129.0000002623 129.0000002623 129.0000002623
|
||||
16 239.8561894618 2817.7971208196 2146689.0148968464 2639.8950496967 313.8829351726 6048938597.3658657074 129.0000002984 129.0000002984 129.0000002984
|
||||
17 239.8816520361 2817.8185621543 2146689.0168171758 2639.8961257823 313.9162562538 6048984630.6545839310 129.0000003369 129.0000003369 129.0000003369
|
||||
18 239.9099966096 2817.8417368960 2146689.0188538977 2639.8965743204 313.9533488047 6049034385.3571958542 129.0000003777 129.0000003777 129.0000003777
|
||||
19 240.0514024347 2817.9389205774 2146689.0210070144 2639.8966103811 314.1383966683 6049243014.5661621094 129.0000004208 129.0000004208 129.0000004208
|
||||
20 239.8802541140 2817.8327386176 2146689.0232765260 2639.8962085210 313.9144268914 6049015081.3139505386 129.0000004662 129.0000004662 129.0000004662
|
||||
21 239.8462621903 2817.8160306167 2146689.0256624296 2639.8953174755 313.8699440502 6048979221.1549577713 129.0000005140 129.0000005140 129.0000005140
|
||||
22 240.0487944678 2817.9533849157 2146689.0281647225 2639.8938590354 314.1349838054 6049274085.1726217270 129.0000005642 129.0000005642 129.0000005642
|
||||
23 240.0966314441 2817.9897873787 2146689.0307834130 2639.8918104774 314.1975846937 6049352237.3198652267 129.0000006166 129.0000006166 129.0000006166
|
||||
24 240.1765312516 2818.0463843765 2146689.0335185044 2639.8891292321 314.3021439554 6049473741.1817827225 129.0000006714 129.0000006714 129.0000006714
|
||||
25 240.1500705973 2818.0336048048 2146689.0363699966 2639.8858785483 314.2675167572 6049446315.4509468079 129.0000007285 129.0000007285 129.0000007285
|
||||
26 240.2681423500 2818.1151708195 2146689.0393378921 2639.8825176506 314.4220289603 6049621420.6842966080 129.0000007880 129.0000007880 129.0000007880
|
||||
27 240.4728815247 2818.2527327079 2146689.0424221945 2639.8784158747 314.6899567267 6049916731.9748563766 129.0000008498 129.0000008498 129.0000008498
|
||||
28 240.4793027032 2818.2613348477 2146689.0456229053 2639.8736089473 314.6983596717 6049935207.1145420074 129.0000009139 129.0000009139 129.0000009139
|
||||
29 240.5020619198 2818.2805472685 2146689.0489400285 2639.8681043704 314.7281430587 6049976459.5562763214 129.0000009803 129.0000009803 129.0000009803
|
||||
30 240.5513721776 2818.3167157263 2146689.0523735629 2639.8623484053 314.7926719270 6050054111.6652946472 129.0000010491 129.0000010491 129.0000010491
|
||||
31 240.7340393104 2818.4391703712 2146689.0559235099 2639.8563442170 315.0317155636 6050316993.7162160873 129.0000011202 129.0000011202 129.0000011202
|
||||
32 240.8254719483 2818.5014640740 2146689.0595898777 2639.8498122053 315.1513670299 6050450729.2599506378 129.0000011936 129.0000011936 129.0000011936
|
||||
33 240.9681573541 2818.5965480750 2146689.0633726656 2639.8425779528 315.3380893908 6050654855.7068986893 129.0000012694 129.0000012694 129.0000012694
|
||||
34 241.0039494187 2818.6217008564 2146689.0672718794 2639.8347174393 315.3849279499 6050708861.8979463577 129.0000013475 129.0000013475 129.0000013475
|
||||
35 241.0314566197 2818.6411150538 2146689.0712875174 2639.8262983643 315.4209246902 6050750549.4619541168 129.0000014279 129.0000014279 129.0000014279
|
||||
36 241.0829173424 2818.6763455617 2146689.0754195810 2639.8174397481 315.4882677207 6050826190.0551443100 129.0000015107 129.0000015107 129.0000015107
|
||||
37 241.2845682012 2818.8087982181 2146689.0796680767 2639.8080129872 315.7521540252 6051110536.7012710571 129.0000015958 129.0000015958 129.0000015958
|
||||
38 241.3214712920 2818.8336260248 2146689.0840330068 2639.7981963574 315.8004465062 6051163846.5868301392 129.0000016833 129.0000016833 129.0000016833
|
||||
39 241.3392127125 2818.8456991528 2146689.0885143690 2639.7879618658 315.8236634561 6051189776.4712991714 129.0000017730 129.0000017730 129.0000017730
|
||||
40 241.5383770555 2818.9753950055 2146689.0931121684 2639.7769824244 316.0842958321 6051468206.1039972305 129.0000018651 129.0000018651 129.0000018651
|
||||
41 241.5059730674 2818.9543817992 2146689.0978264087 2639.7656512498 316.0418910106 6051423110.5725250244 129.0000019595 129.0000019595 129.0000019595
|
||||
42 241.3907605672 2818.8793800508 2146689.1026570834 2639.7541331920 315.8911205101 6051262118.7541017532 129.0000020563 129.0000020563 129.0000020563
|
||||
43 241.5095917610 2818.9559595711 2146689.1076041958 2639.7424355740 316.0466265406 6051426525.1214485168 129.0000021554 129.0000021554 129.0000021554
|
||||
44 241.6271631762 2819.0312325531 2146689.1126677482 2639.7297705654 316.2004839873 6051588127.0861988068 129.0000022568 129.0000022568 129.0000022568
|
||||
45 241.5702411838 2818.9923790176 2146689.1178477411 2639.7163554760 316.1259941770 6051504735.2269029617 129.0000023606 129.0000023606 129.0000023606
|
||||
46 241.7029985068 2819.0771124986 2146689.1231441777 2639.7024246704 316.2997243538 6051686646.5996389389 129.0000024667 129.0000024667 129.0000024667
|
||||
47 241.7966144965 2819.1357830868 2146689.1285570571 2639.6882106593 316.4222330191 6051812609.3728218079 129.0000025751 129.0000025751 129.0000025751
|
||||
48 241.8573480255 2819.1726205120 2146689.1340863821 2639.6735287925 316.5017107195 6051891703.4611186981 129.0000026859 129.0000026859 129.0000026859
|
||||
49 241.9611147338 2819.2374095379 2146689.1397321564 2639.6583357477 316.6375029166 6052030801.2758235931 129.0000027990 129.0000027990 129.0000027990
|
||||
50 242.1023518806 2819.3259059811 2146689.1454943856 2639.6424863169 316.8223300428 6052220791.8748512268 129.0000029144 129.0000029144 129.0000029144
|
||||
51 242.1174105473 2819.3319633044 2146689.1513730693 2639.6264141131 316.8420362613 6052233811.6391019821 129.0000030321 129.0000030321 129.0000030321
|
||||
52 242.2534914901 2819.4164594322 2146689.1573682069 2639.6098392671 317.0201158259 6052415215.4627037048 129.0000031522 129.0000031522 129.0000031522
|
||||
53 242.3504633236 2819.4754119996 2146689.1634798055 2639.5930076506 317.1470160479 6052541785.5314817429 129.0000032746 129.0000032746 129.0000032746
|
||||
54 242.2982323323 2819.4368568264 2146689.1697078613 2639.5756353782 317.0786650211 6052459037.1184797287 129.0000033994 129.0000033994 129.0000033994
|
||||
55 242.3452896272 2819.4623310219 2146689.1760523771 2639.5575918586 317.1402455951 6052513740.1862611771 129.0000035265 129.0000035265 129.0000035265
|
||||
56 242.4181903333 2819.5048897011 2146689.1825133534 2639.5390347547 317.2356456249 6052605118.6588287354 129.0000036559 129.0000036559 129.0000036559
|
||||
57 242.5317091656 2819.5739975787 2146689.1890907930 2639.5199828249 317.3841997413 6052753490.3378009796 129.0000037876 129.0000037876 129.0000037876
|
||||
58 242.5478978740 2819.5796954935 2146689.1957846982 2639.5006137388 317.4053847660 6052765740.8638200760 129.0000039217 129.0000039217 129.0000039217
|
||||
59 242.6655316466 2819.6519225743 2146689.2025950695 2639.4808234811 317.5593238156 6052920809.1607065201 129.0000040582 129.0000040582 129.0000040582
|
||||
60 242.8126131177 2819.7431588157 2146689.2095219092 2639.4607996998 317.7517989980 6053116684.5470046997 129.0000041969 129.0000041969 129.0000041969
|
||||
61 242.7957124913 2819.7275989047 2146689.2165652174 2639.4406312730 317.7296823362 6053083302.1140241623 129.0000043380 129.0000043380 129.0000043380
|
||||
62 242.9276177041 2819.8088790098 2146689.2237249981 2639.4201279058 317.9022974164 6053257805.4283437729 129.0000044814 129.0000044814 129.0000044814
|
||||
63 243.0465445938 2819.8814758895 2146689.2310012528 2639.3991657500 318.0579286774 6053413668.8858547211 129.0000046272 129.0000046272 129.0000046272
|
||||
64 242.9890585501 2819.8387587817 2146689.2383939880 2639.3781767844 317.9827007328 6053321989.3768787384 129.0000047752 129.0000047752 129.0000047752
|
||||
65 242.9653746583 2819.8180104181 2146689.2459031967 2639.3568184374 317.9517072884 6053277470.2627182007 129.0000049256 129.0000049256 129.0000049256
|
||||
66 243.0259297024 2819.8514334947 2146689.2535288804 2639.3352568621 318.0309514181 6053349240.7251205444 129.0000050784 129.0000050784 129.0000050784
|
||||
67 242.9638979697 2819.8046112742 2146689.2612710390 2639.3134547096 317.9497748498 6053248749.7987766266 129.0000052335 129.0000052335 129.0000052335
|
||||
68 243.0283540775 2819.8395632725 2146689.2691296688 2639.2912303374 318.0341240273 6053323803.0382738113 129.0000053909 129.0000053909 129.0000053909
|
||||
69 243.2256418664 2819.9609646019 2146689.2771047787 2639.2684509205 318.2923006889 6053584436.4588871002 129.0000055506 129.0000055506 129.0000055506
|
||||
70 243.2507495334 2819.9706145524 2146689.2851963686 2639.2450126010 318.3251573278 6053605174.7221174240 129.0000057127 129.0000057127 129.0000057127
|
||||
71 243.4287155518 2820.0794853386 2146689.2934044413 2639.2213699915 318.5580489464 6053838909.6197280884 129.0000058771 129.0000058771 129.0000058771
|
||||
72 243.5097518574 2820.1249498194 2146689.3017290002 2639.1971212009 318.6640954635 6053936531.2101163864 129.0000060439 129.0000060439 129.0000060439
|
||||
73 243.5356790969 2820.1337977544 2146689.3101700447 2639.1723394661 318.6980246193 6053955548.7824945450 129.0000062130 129.0000062130 129.0000062130
|
||||
74 243.5479180498 2820.1331964183 2146689.3187275808 2639.1473868749 318.7140408766 6053954282.0339813232 129.0000063844 129.0000063844 129.0000063844
|
||||
75 243.7115573025 2820.2314361523 2146689.3274016059 2639.1220411207 318.9281840641 6054165196.6845111847 129.0000065581 129.0000065581 129.0000065581
|
||||
76 243.7457279618 2820.2454531429 2146689.3361921217 2639.0963868224 318.9729008040 6054195311.5999307632 129.0000067342 129.0000067342 129.0000067342
|
||||
77 243.8345031069 2820.2948644965 2146689.3450991292 2639.0700900389 319.0890745962 6054301407.5461502075 129.0000069126 129.0000069126 129.0000069126
|
||||
78 244.0193931195 2820.4067881628 2146689.3541226317 2639.0435094409 319.3310271594 6054541698.3381366730 129.0000070934 129.0000070934 129.0000070934
|
||||
79 243.9919100078 2820.3799166166 2146689.3632626338 2639.0164249037 319.2950619430 6054484039.2541246414 129.0000072765 129.0000072765 129.0000072765
|
||||
80 244.0965612207 2820.4387335935 2146689.3725191355 2638.9888176882 319.4320116291 6054610327.1403293610 129.0000074619 129.0000074619 129.0000074619
|
||||
81 244.1334315951 2820.4535208568 2146689.3818921377 2638.9608330195 319.4802612965 6054642097.2373485565 129.0000076496 129.0000076496 129.0000076496
|
||||
82 244.3029520408 2820.5543485196 2146689.3913816395 2638.9318525796 319.7021007878 6054858569.6761827469 129.0000078397 129.0000078397 129.0000078397
|
||||
83 244.3445761189 2820.5713690935 2146689.4009876498 2638.9021684795 319.7565712929 6054895134.6560049057 129.0000080321 129.0000080321 129.0000080321
|
||||
84 244.2696671559 2820.5125763350 2146689.4107101629 2638.8720941742 319.6585431986 6054768952.2869329453 129.0000082269 129.0000082269 129.0000082269
|
||||
85 244.5161919319 2820.6629431352 2146689.4205491822 2638.8415194387 319.9811528443 6055091770.8571672440 129.0000084240 129.0000084240 129.0000084240
|
||||
86 244.5641090282 2820.6838080201 2146689.4305047127 2638.8103612394 320.0438585800 6055136589.3662166595 129.0000086234 129.0000086234 129.0000086234
|
||||
87 244.5348240638 2820.6541129118 2146689.4405767513 2638.7789728309 320.0055354056 6055072871.6007261276 129.0000088251 129.0000088251 129.0000088251
|
||||
88 244.6939431427 2820.7468233396 2146689.4507653015 2638.7470269267 320.2137633592 6055271920.8364210129 129.0000090292 129.0000090292 129.0000090292
|
||||
89 244.8800201091 2820.8567117003 2146689.4610703662 2638.7147520097 320.4572692055 6055507846.0901927948 129.0000092356 129.0000092356 129.0000092356
|
||||
90 244.8804280382 2820.8451141876 2146689.4714919478 2638.6820441173 320.4578030336 6055482979.2295818329 129.0000094444 129.0000094444 129.0000094444
|
||||
91 244.9558851986 2820.8815975090 2146689.4820300462 2638.6491836104 320.5565485155 6055561327.3181543350 129.0000096555 129.0000096555 129.0000096555
|
||||
92 244.9965893140 2820.8949614294 2146689.4926846647 2638.6159817170 320.6098151301 6055590045.5610351562 129.0000098689 129.0000098689 129.0000098689
|
||||
93 245.1381056687 2820.9732811388 2146689.5034558061 2638.5824451870 320.7950076360 6055758204.0434722900 129.0000100846 129.0000100846 129.0000100846
|
||||
94 245.2954807041 2821.0619342131 2146689.5143434699 2638.5485198222 321.0009532826 6055948545.3822879791 129.0000103027 129.0000103027 129.0000103027
|
||||
95 245.3535822199 2821.0860553731 2146689.5253476589 2638.5144817512 321.0769866522 6056000357.0671482086 129.0000105232 129.0000105232 129.0000105232
|
||||
96 245.5013476026 2821.1682908185 2146689.5364683764 2638.4801107361 321.2703568219 6056176922.4099712372 129.0000107459 129.0000107459 129.0000107459
|
||||
97 245.4166531417 2821.0989038023 2146689.5477056229 2638.4453663061 321.1595231342 6056028001.7295455933 129.0000109710 129.0000109710 129.0000109710
|
||||
98 245.4121937790 2821.0817490953 2146689.5590593945 2638.4097762390 321.1536874797 6055991207.9293851852 129.0000111984 129.0000111984 129.0000111984
|
||||
99 245.4532592994 2821.0946353191 2146689.5705296928 2638.3738037546 321.2074270397 6056018903.0102539062 129.0000114282 129.0000114282 129.0000114282
|
||||
100 245.7500657390 2821.2735939427 2146689.5821165247 2638.3375549051 321.5958367642 6056403104.3106222153 129.0000116603 129.0000116603 129.0000116603
|
||||
Loop time of 5.22601 on 1 procs for 100 steps with 10125 atoms
|
||||
|
||||
Performance: 2.133 ns/day, 11.250 hours/ns, 24.691 timesteps/s
|
||||
99.8% CPU use with 1 MPI tasks x no OpenMP threads
|
||||
Performance: 1.653 ns/day, 14.517 hours/ns, 19.135 timesteps/s
|
||||
99.7% CPU use with 1 MPI tasks x no OpenMP threads
|
||||
|
||||
MPI task timing breakdown:
|
||||
Section | min time | avg time | max time |%varavg| %total
|
||||
---------------------------------------------------------------
|
||||
Pair | 0.46587 | 0.46587 | 0.46587 | 0.0 | 11.50
|
||||
Neigh | 1.4713 | 1.4713 | 1.4713 | 0.0 | 36.33
|
||||
Comm | 0.05567 | 0.05567 | 0.05567 | 0.0 | 1.37
|
||||
Output | 0.011364 | 0.011364 | 0.011364 | 0.0 | 0.28
|
||||
Modify | 2.0158 | 2.0158 | 2.0158 | 0.0 | 49.77
|
||||
Other | | 0.03004 | | | 0.74
|
||||
Pair | 0.44045 | 0.44045 | 0.44045 | 0.0 | 8.43
|
||||
Neigh | 2.669 | 2.669 | 2.669 | 0.0 | 51.07
|
||||
Comm | 0.056143 | 0.056143 | 0.056143 | 0.0 | 1.07
|
||||
Output | 0.012469 | 0.012469 | 0.012469 | 0.0 | 0.24
|
||||
Modify | 2.0163 | 2.0163 | 2.0163 | 0.0 | 38.58
|
||||
Other | | 0.03168 | | | 0.61
|
||||
|
||||
Nlocal: 10125 ave 10125 max 10125 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
@ -172,4 +180,4 @@ Dangerous builds not checked
|
||||
|
||||
Please see the log.cite file for references relevant to this simulation
|
||||
|
||||
Total wall time: 0:00:04
|
||||
Total wall time: 0:00:05
|
||||
|
||||
@ -1,163 +1,163 @@
|
||||
############################################################################
|
||||
# Input file for investigating twinning nucleation under uniaxial loading with basal plane vector analysis
|
||||
# Christopher Barrett, March 2013
|
||||
# This script requires a Mg pair potential file to be in the same directory.
|
||||
|
||||
# fname is the file name. It is necessary for loops to work correctly. (See jump command)
|
||||
variable fname index in.basal
|
||||
|
||||
######################################
|
||||
# POTENTIAL VARIABLES
|
||||
# lattice parameters and the minimum energy per atom which should be obtained with the current pair potential and homogeneous lattice
|
||||
variable lx equal 3.181269601
|
||||
variable b equal sqrt(3)
|
||||
variable c equal sqrt(8/3)
|
||||
variable ly equal ${b}*${lx}
|
||||
variable lz equal ${c}*${lx}
|
||||
variable pairlocation index almg.liu
|
||||
variable pairstyle index eam/alloy/opt
|
||||
|
||||
######################################
|
||||
# EQUILIBRATION/DEFORMATION VARIABLES
|
||||
# eqpress = 10 bar = 1 MPa
|
||||
# tstep (the timestep) is set to a default value of 0.001 (1 fs)
|
||||
# seed randomizes the velocity
|
||||
# srate is the rate of strain in 1/s
|
||||
# Ndump is the number of timesteps in between each dump of the atom coordinates
|
||||
variable tstep equal 0.001
|
||||
variable seed equal 95812384
|
||||
variable srate equal 1e9
|
||||
|
||||
######################################
|
||||
# INITIALIZATION
|
||||
units metal
|
||||
dimension 3
|
||||
boundary s s s
|
||||
atom_style atomic
|
||||
|
||||
######################################
|
||||
# ATOM BUILD
|
||||
atom_modify map array
|
||||
|
||||
# lattice custom scale a1 "coordinates of a1" a2 "coordinates of a2" a3 "coordinates of a3" basis "atom1 coordinates" basis "atom2 coordinates" basis "atom3 coordinates" basis "atom4 coordinates" orient x "crystallagraphic orientation of x axis" orient y "crystallagraphic orientation of y axis" z "crystallagraphic orientation of z axis"
|
||||
lattice custom 3.181269601 a1 1 0 0 a2 0 1.732050808 0 a3 0 0 1.632993162 basis 0.0 0.0 0.0 basis 0.5 0.5 0 basis 0 0.3333333 0.5 basis 0.5 0.833333 0.5 orient x 0 1 1 orient y 1 0 0 orient z 0 1 -1
|
||||
variable multiple equal 20
|
||||
variable mx equal "v_lx*v_multiple"
|
||||
variable my equal "v_ly*v_multiple"
|
||||
variable mz equal "v_lz*v_multiple"
|
||||
|
||||
# the simulation region should be from 0 to a multiple of the periodic boundary in x, y and z.
|
||||
region whole block 0 ${mz} 0 ${mx} 0 ${my} units box
|
||||
create_box 2 whole
|
||||
create_atoms 1 box basis 1 1 basis 2 1 basis 3 1 basis 4 1
|
||||
|
||||
region fixed1 block INF INF INF INF INF 10 units box
|
||||
region fixed2 block INF INF INF INF 100 INF units box
|
||||
group lower region fixed1
|
||||
group upper region fixed2
|
||||
group boundary union upper lower
|
||||
group mobile subtract all boundary
|
||||
|
||||
variable natoms equal "count(all)"
|
||||
print "# of atoms are: ${natoms}"
|
||||
|
||||
######################################
|
||||
# INTERATOMIC POTENTIAL
|
||||
pair_style ${pairstyle}
|
||||
pair_coeff * * ${pairlocation} Mg Mg
|
||||
|
||||
######################################
|
||||
# COMPUTES REQUIRED
|
||||
compute csym all centro/atom 12
|
||||
compute eng all pe/atom
|
||||
compute eatoms all reduce sum c_eng
|
||||
compute basal all basal/atom
|
||||
|
||||
######################################
|
||||
# MINIMIZATION
|
||||
# Primarily adjusts the c/a ratio to value predicted by EAM potential
|
||||
reset_timestep 0
|
||||
thermo 1
|
||||
thermo_style custom step pe c_eatoms
|
||||
min_style cg
|
||||
minimize 1e-15 1e-15 1000 2000
|
||||
variable eminimum equal "c_eatoms / count(all)"
|
||||
print "%%e(it,1)=${eminimum}"
|
||||
|
||||
######################################
|
||||
# EQUILIBRATION
|
||||
reset_timestep 0
|
||||
timestep ${tstep}
|
||||
# atoms are given a random velocity based on a temperature of 100K.
|
||||
velocity all create 100 ${seed} mom yes rot no
|
||||
|
||||
# temperature and pressure are set to 100 and 0
|
||||
fix 1 all nve
|
||||
|
||||
# Set thermo output
|
||||
thermo 100
|
||||
thermo_style custom step lx ly lz press pxx pyy pzz pe temp
|
||||
|
||||
# Run for at least 2 picosecond (assuming 1 fs timestep)
|
||||
run 2000
|
||||
|
||||
# Loop to run until pressure is below the variable eqpress (defined at beginning of file)
|
||||
label loopeq
|
||||
variable eq loop 100
|
||||
run 250
|
||||
variable converge equal press
|
||||
if "${converge} <= 0" then "variable converge equal -press" else "variable converge equal press"
|
||||
if "${converge} <= 50" then "jump ${fname} breakeq"
|
||||
next eq
|
||||
jump ${fname} loopeq
|
||||
label breakeq
|
||||
|
||||
# Store length for strain rate calculations
|
||||
variable tmp equal "lx"
|
||||
variable L0 equal ${tmp}
|
||||
print "Initial Length, L0: ${L0}"
|
||||
unfix 1
|
||||
|
||||
######################################
|
||||
# DEFORMATION
|
||||
reset_timestep 0
|
||||
timestep ${tstep}
|
||||
|
||||
# Impose constant strain rate
|
||||
variable srate1 equal "v_srate / 1.0e10"
|
||||
velocity upper set 0.0 NULL 0.0 units box
|
||||
velocity lower set 0.0 NULL 0.0 units box
|
||||
|
||||
fix 2 upper setforce 0.0 NULL 0.0
|
||||
fix 3 lower setforce 0.0 NULL 0.0
|
||||
fix 1 all nve
|
||||
|
||||
# Output strain and stress info to file
|
||||
# for units metal, pressure is in [bars] = 100 [kPa] = 1/10000 [GPa]
|
||||
# p2 is in GPa
|
||||
variable strain equal "(lx - v_L0)/v_L0"
|
||||
variable p1 equal "v_strain"
|
||||
variable p2 equal "-pxz/10000"
|
||||
variable p3 equal "lx"
|
||||
variable p4 equal "temp"
|
||||
variable p5 equal "pe"
|
||||
variable p6 equal "ke"
|
||||
fix def1 all print 100 "${p1} ${p2} ${p3} ${p4} ${p5} ${p6}" file output.def1.txt screen no
|
||||
# Dump coordinates to file (for void size calculations)
|
||||
dump 1 all custom 1000 output.dump.* id x y z c_basal[1] c_basal[2] c_basal[3]
|
||||
|
||||
# Display thermo
|
||||
thermo_style custom step v_strain pxz lx temp pe ke
|
||||
restart 50000 output.restart
|
||||
|
||||
# run deformation for 100000 timesteps (10% strain assuming 1 fs timestep and 1e9/s strainrate)
|
||||
variable runtime equal 0
|
||||
label loop
|
||||
displace_atoms all ramp x 0.0 ${srate1} z 10 100 units box
|
||||
run 100
|
||||
variable runtime equal ${runtime}+100
|
||||
if "${runtime} < 100000" then "jump ${fname} loop"
|
||||
|
||||
######################################
|
||||
# SIMULATION DONE
|
||||
print "All done"
|
||||
############################################################################
|
||||
# Input file for investigating twinning nucleation under uniaxial loading with basal plane vector analysis
|
||||
# Christopher Barrett, March 2013
|
||||
# This script requires a Mg pair potential file to be in the same directory.
|
||||
|
||||
# fname is the file name. It is necessary for loops to work correctly. (See jump command)
|
||||
variable fname index in.basal
|
||||
|
||||
######################################
|
||||
# POTENTIAL VARIABLES
|
||||
# lattice parameters and the minimum energy per atom which should be obtained with the current pair potential and homogeneous lattice
|
||||
variable lx equal 3.181269601
|
||||
variable b equal sqrt(3)
|
||||
variable c equal sqrt(8/3)
|
||||
variable ly equal ${b}*${lx}
|
||||
variable lz equal ${c}*${lx}
|
||||
variable pairlocation index almg.liu
|
||||
variable pairstyle index eam/alloy/opt
|
||||
|
||||
######################################
|
||||
# EQUILIBRATION/DEFORMATION VARIABLES
|
||||
# eqpress = 10 bar = 1 MPa
|
||||
# tstep (the timestep) is set to a default value of 0.001 (1 fs)
|
||||
# seed randomizes the velocity
|
||||
# srate is the rate of strain in 1/s
|
||||
# Ndump is the number of timesteps in between each dump of the atom coordinates
|
||||
variable tstep equal 0.001
|
||||
variable seed equal 95812384
|
||||
variable srate equal 1e9
|
||||
|
||||
######################################
|
||||
# INITIALIZATION
|
||||
units metal
|
||||
dimension 3
|
||||
boundary s s s
|
||||
atom_style atomic
|
||||
|
||||
######################################
|
||||
# ATOM BUILD
|
||||
atom_modify map array
|
||||
|
||||
# lattice custom scale a1 "coordinates of a1" a2 "coordinates of a2" a3 "coordinates of a3" basis "atom1 coordinates" basis "atom2 coordinates" basis "atom3 coordinates" basis "atom4 coordinates" orient x "crystallagraphic orientation of x axis" orient y "crystallagraphic orientation of y axis" z "crystallagraphic orientation of z axis"
|
||||
lattice custom 3.181269601 a1 1 0 0 a2 0 1.732050808 0 a3 0 0 1.632993162 basis 0.0 0.0 0.0 basis 0.5 0.5 0 basis 0 0.3333333 0.5 basis 0.5 0.833333 0.5 orient x 0 1 1 orient y 1 0 0 orient z 0 1 -1
|
||||
variable multiple equal 20
|
||||
variable mx equal "v_lx*v_multiple"
|
||||
variable my equal "v_ly*v_multiple"
|
||||
variable mz equal "v_lz*v_multiple"
|
||||
|
||||
# the simulation region should be from 0 to a multiple of the periodic boundary in x, y and z.
|
||||
region whole block 0 ${mz} 0 ${mx} 0 ${my} units box
|
||||
create_box 2 whole
|
||||
create_atoms 1 box basis 1 1 basis 2 1 basis 3 1 basis 4 1
|
||||
|
||||
region fixed1 block INF INF INF INF INF 10 units box
|
||||
region fixed2 block INF INF INF INF 100 INF units box
|
||||
group lower region fixed1
|
||||
group upper region fixed2
|
||||
group boundary union upper lower
|
||||
group mobile subtract all boundary
|
||||
|
||||
variable natoms equal "count(all)"
|
||||
print "# of atoms are: ${natoms}"
|
||||
|
||||
######################################
|
||||
# INTERATOMIC POTENTIAL
|
||||
pair_style ${pairstyle}
|
||||
pair_coeff * * ${pairlocation} Mg Mg
|
||||
|
||||
######################################
|
||||
# COMPUTES REQUIRED
|
||||
compute csym all centro/atom 12
|
||||
compute eng all pe/atom
|
||||
compute eatoms all reduce sum c_eng
|
||||
compute basal all basal/atom
|
||||
|
||||
######################################
|
||||
# MINIMIZATION
|
||||
# Primarily adjusts the c/a ratio to value predicted by EAM potential
|
||||
reset_timestep 0
|
||||
thermo 1
|
||||
thermo_style custom step pe c_eatoms
|
||||
min_style cg
|
||||
minimize 1e-15 1e-15 1000 2000
|
||||
variable eminimum equal "c_eatoms / count(all)"
|
||||
print "%%e(it,1)=${eminimum}"
|
||||
|
||||
######################################
|
||||
# EQUILIBRATION
|
||||
reset_timestep 0
|
||||
timestep ${tstep}
|
||||
# atoms are given a random velocity based on a temperature of 100K.
|
||||
velocity all create 100 ${seed} mom yes rot no
|
||||
|
||||
# temperature and pressure are set to 100 and 0
|
||||
fix 1 all nve
|
||||
|
||||
# Set thermo output
|
||||
thermo 100
|
||||
thermo_style custom step lx ly lz press pxx pyy pzz pe temp
|
||||
|
||||
# Run for at least 2 picosecond (assuming 1 fs timestep)
|
||||
run 2000
|
||||
|
||||
# Loop to run until pressure is below the variable eqpress (defined at beginning of file)
|
||||
label loopeq
|
||||
variable eq loop 100
|
||||
run 250
|
||||
variable converge equal press
|
||||
if "${converge} <= 0" then "variable converge equal -press" else "variable converge equal press"
|
||||
if "${converge} <= 50" then "jump ${fname} breakeq"
|
||||
next eq
|
||||
jump ${fname} loopeq
|
||||
label breakeq
|
||||
|
||||
# Store length for strain rate calculations
|
||||
variable tmp equal "lx"
|
||||
variable L0 equal ${tmp}
|
||||
print "Initial Length, L0: ${L0}"
|
||||
unfix 1
|
||||
|
||||
######################################
|
||||
# DEFORMATION
|
||||
reset_timestep 0
|
||||
timestep ${tstep}
|
||||
|
||||
# Impose constant strain rate
|
||||
variable srate1 equal "v_srate / 1.0e10"
|
||||
velocity upper set 0.0 NULL 0.0 units box
|
||||
velocity lower set 0.0 NULL 0.0 units box
|
||||
|
||||
fix 2 upper setforce 0.0 NULL 0.0
|
||||
fix 3 lower setforce 0.0 NULL 0.0
|
||||
fix 1 all nve
|
||||
|
||||
# Output strain and stress info to file
|
||||
# for units metal, pressure is in [bars] = 100 [kPa] = 1/10000 [GPa]
|
||||
# p2 is in GPa
|
||||
variable strain equal "(lx - v_L0)/v_L0"
|
||||
variable p1 equal "v_strain"
|
||||
variable p2 equal "-pxz/10000"
|
||||
variable p3 equal "lx"
|
||||
variable p4 equal "temp"
|
||||
variable p5 equal "pe"
|
||||
variable p6 equal "ke"
|
||||
fix def1 all print 100 "${p1} ${p2} ${p3} ${p4} ${p5} ${p6}" file output.def1.txt screen no
|
||||
# Dump coordinates to file (for void size calculations)
|
||||
dump 1 all custom 1000 output.dump.* id x y z c_basal[1] c_basal[2] c_basal[3]
|
||||
|
||||
# Display thermo
|
||||
thermo_style custom step v_strain pxz lx temp pe ke
|
||||
restart 50000 output.restart
|
||||
|
||||
# run deformation for 100000 timesteps (10% strain assuming 1 fs timestep and 1e9/s strainrate)
|
||||
variable runtime equal 0
|
||||
label loop
|
||||
displace_atoms all ramp x 0.0 ${srate1} z 10 100 units box
|
||||
run 100
|
||||
variable runtime equal ${runtime}+100
|
||||
if "${runtime} < 100000" then "jump ${fname} loop"
|
||||
|
||||
######################################
|
||||
# SIMULATION DONE
|
||||
print "All done"
|
||||
|
||||
@ -15,6 +15,7 @@ bond_style harmonic
|
||||
bond_coeff * 225.0 0.85
|
||||
|
||||
comm_modify vel yes
|
||||
comm_modify cutoff 3.6
|
||||
|
||||
# must use pair hybrid, since srp bond particles
|
||||
# do not interact with other atoms types
|
||||
|
||||
10
examples/mscg/README
Normal file
10
examples/mscg/README
Normal file
@ -0,0 +1,10 @@
|
||||
Running this example requires that LAMMPS be built with the MSCG
|
||||
package and its fix mscg command. The fix uses the Multi-Scale
|
||||
Coarse-Graining (MS-CG) library, freely available at
|
||||
https://github.com/uchicago-voth/MSCG-release, to compute optimized
|
||||
coarse-grained force field parameters. The MS-CG library was
|
||||
developed by Jacob Wagner in Greg Voth's group at the University of
|
||||
Chicago.
|
||||
|
||||
See the lib/mscg/README file for instructions on how to download and
|
||||
install the MS-CG library for use with LAMMPS.
|
||||
12
examples/mscg/control.in
Normal file
12
examples/mscg/control.in
Normal file
@ -0,0 +1,12 @@
|
||||
block_size 1
|
||||
start_frame 1
|
||||
n_frames 19
|
||||
nonbonded_cutoff 10.0
|
||||
basis_type 0
|
||||
primary_output_style 0
|
||||
output_solution_flag 1
|
||||
output_spline_coeffs_flag 1
|
||||
pair_nonbonded_bspline_basis_order 6
|
||||
pair_nonbonded_basis_set_resolution 0.7
|
||||
pair_nonbonded_output_binwidth 0.1
|
||||
matrix_type 0
|
||||
1015
examples/mscg/data.meoh
Normal file
1015
examples/mscg/data.meoh
Normal file
File diff suppressed because it is too large
Load Diff
20180
examples/mscg/dump.meoh
Normal file
20180
examples/mscg/dump.meoh
Normal file
File diff suppressed because it is too large
Load Diff
22
examples/mscg/in.mscg
Normal file
22
examples/mscg/in.mscg
Normal file
@ -0,0 +1,22 @@
|
||||
units real
|
||||
atom_style full
|
||||
pair_style zero 10.0
|
||||
|
||||
read_data data.meoh
|
||||
pair_coeff * *
|
||||
|
||||
thermo 1
|
||||
thermo_style custom step
|
||||
|
||||
# Test 1a: range finder functionality
|
||||
fix 1 all mscg 1 range on
|
||||
rerun dump.meoh first 0 last 4500 every 250 dump x y z fx fy fz
|
||||
print "TEST_1a mscg range finder"
|
||||
unfix 1
|
||||
|
||||
# Test 1b: force matching functionality
|
||||
fix 1 all mscg 1
|
||||
rerun dump.meoh first 0 last 4500 every 250 dump x y z fx fy fz
|
||||
print "TEST_1b mscg force matching"
|
||||
|
||||
print TEST_DONE
|
||||
77
examples/mscg/output_9Jan17/1_1.dat
Normal file
77
examples/mscg/output_9Jan17/1_1.dat
Normal file
@ -0,0 +1,77 @@
|
||||
2.500000 5.670970817963099e+02
|
||||
2.600000 2.404059283529051e+02
|
||||
2.700000 9.157060823529977e+01
|
||||
2.800000 3.428273061369140e+01
|
||||
2.900000 1.619868149395266e+01
|
||||
3.000000 1.039607214301755e+01
|
||||
3.100000 6.830187514267188e+00
|
||||
3.200000 3.861970842349535e+00
|
||||
3.300000 1.645948643278161e+00
|
||||
3.400000 2.395428971623918e-01
|
||||
3.500000 -4.276763637833773e-01
|
||||
3.600000 -5.132022977965877e-01
|
||||
3.700000 -2.208024961234051e-01
|
||||
3.800000 2.402697744243800e-01
|
||||
3.900000 6.956064296165573e-01
|
||||
4.000000 1.034070044257954e+00
|
||||
4.100000 1.205997975111669e+00
|
||||
4.200000 1.209501102128581e+00
|
||||
4.300000 1.076304670380924e+00
|
||||
4.400000 8.575891319958883e-01
|
||||
4.500000 6.098309880892070e-01
|
||||
4.600000 3.807992942746473e-01
|
||||
4.700000 1.995994191469442e-01
|
||||
4.800000 7.699059877424269e-02
|
||||
4.900000 9.750744163981299e-03
|
||||
5.000000 -1.480308769532222e-02
|
||||
5.100000 -1.429422279228416e-02
|
||||
5.200000 -6.765899050869768e-03
|
||||
5.300000 -6.214398421078919e-03
|
||||
5.400000 -1.951586041390797e-02
|
||||
5.500000 -4.689090237947263e-02
|
||||
5.600000 -8.376292122940529e-02
|
||||
5.700000 -1.226699982917263e-01
|
||||
5.800000 -1.551768041657136e-01
|
||||
5.900000 -1.737865035767736e-01
|
||||
6.000000 -1.738272491408507e-01
|
||||
6.100000 -1.546779867768825e-01
|
||||
6.200000 -1.193171291488982e-01
|
||||
6.300000 -7.321054075616322e-02
|
||||
6.400000 -2.317411193286228e-02
|
||||
6.500000 2.376366715221714e-02
|
||||
6.600000 6.149913249600215e-02
|
||||
6.700000 8.597538938112201e-02
|
||||
6.800000 9.590170060736655e-02
|
||||
6.900000 9.245100462148878e-02
|
||||
7.000000 7.855487875847664e-02
|
||||
7.100000 5.818301960249692e-02
|
||||
7.200000 3.562272334783877e-02
|
||||
7.300000 1.475836615985744e-02
|
||||
7.400000 -1.639617536128255e-03
|
||||
7.500000 -1.237881063914745e-02
|
||||
7.600000 -1.768202571195587e-02
|
||||
7.700000 -1.877757119362295e-02
|
||||
7.800000 -1.748001968416543e-02
|
||||
7.900000 -1.577097622918088e-02
|
||||
8.000000 -1.537984660448136e-02
|
||||
8.100000 -1.737044400054951e-02
|
||||
8.200000 -2.187939410237979e-02
|
||||
8.300000 -2.823987455760605e-02
|
||||
8.400000 -3.525715284001425e-02
|
||||
8.500000 -4.148996251287761e-02
|
||||
8.600000 -4.553187949229211e-02
|
||||
8.700000 -4.629269831051163e-02
|
||||
8.800000 -4.327548798226762e-02
|
||||
8.900000 -3.674131754868225e-02
|
||||
9.000000 -2.758883541814894e-02
|
||||
9.100000 -1.712151838480657e-02
|
||||
9.200000 -6.810600249997737e-03
|
||||
9.300000 1.941999556272785e-03
|
||||
9.400000 8.040747353879739e-03
|
||||
9.500000 1.092691524686838e-02
|
||||
9.600000 1.063606620723048e-02
|
||||
9.700000 7.416550438142138e-03
|
||||
9.800000 1.175066786686231e-03
|
||||
9.900000 -9.084427187675534e-03
|
||||
10.000000 -2.582180514463068e-02
|
||||
10.100000 -5.352186189454393e-02
|
||||
82
examples/mscg/output_9Jan17/1_1.table
Normal file
82
examples/mscg/output_9Jan17/1_1.table
Normal file
@ -0,0 +1,82 @@
|
||||
# Header information on force file
|
||||
|
||||
1_1
|
||||
N 77 R 2.500000 10.100000
|
||||
|
||||
1 2.500000 69.428523 567.097082
|
||||
2 2.600000 29.053372 240.405928
|
||||
3 2.700000 12.454545 91.570608
|
||||
4 2.800000 6.161878 34.282731
|
||||
5 2.900000 3.637808 16.198681
|
||||
6 3.000000 2.308070 10.396072
|
||||
7 3.100000 1.446757 6.830188
|
||||
8 3.200000 0.912149 3.861971
|
||||
9 3.300000 0.636753 1.645949
|
||||
10 3.400000 0.542478 0.239543
|
||||
11 3.500000 0.551885 -0.427676
|
||||
12 3.600000 0.598929 -0.513202
|
||||
13 3.700000 0.635629 -0.220802
|
||||
14 3.800000 0.634656 0.240270
|
||||
15 3.900000 0.587862 0.695606
|
||||
16 4.000000 0.501378 1.034070
|
||||
17 4.100000 0.389375 1.205998
|
||||
18 4.200000 0.268600 1.209501
|
||||
19 4.300000 0.154310 1.076305
|
||||
20 4.400000 0.057615 0.857589
|
||||
21 4.500000 -0.015756 0.609831
|
||||
22 4.600000 -0.065288 0.380799
|
||||
23 4.700000 -0.094307 0.199599
|
||||
24 4.800000 -0.108137 0.076991
|
||||
25 4.900000 -0.112474 0.009751
|
||||
26 5.000000 -0.112221 -0.014803
|
||||
27 5.100000 -0.110767 -0.014294
|
||||
28 5.200000 -0.109714 -0.006766
|
||||
29 5.300000 -0.109065 -0.006214
|
||||
30 5.400000 -0.107778 -0.019516
|
||||
31 5.500000 -0.104458 -0.046891
|
||||
32 5.600000 -0.097925 -0.083763
|
||||
33 5.700000 -0.087603 -0.122670
|
||||
34 5.800000 -0.073711 -0.155177
|
||||
35 5.900000 -0.057263 -0.173787
|
||||
36 6.000000 -0.039882 -0.173827
|
||||
37 6.100000 -0.023457 -0.154678
|
||||
38 6.200000 -0.009757 -0.119317
|
||||
39 6.300000 -0.000131 -0.073211
|
||||
40 6.400000 0.004688 -0.023174
|
||||
41 6.500000 0.004659 0.023764
|
||||
42 6.600000 0.000396 0.061499
|
||||
43 6.700000 -0.006978 0.085975
|
||||
44 6.800000 -0.016072 0.095902
|
||||
45 6.900000 -0.025489 0.092451
|
||||
46 7.000000 -0.034040 0.078555
|
||||
47 7.100000 -0.040877 0.058183
|
||||
48 7.200000 -0.045567 0.035623
|
||||
49 7.300000 -0.048086 0.014758
|
||||
50 7.400000 -0.048742 -0.001640
|
||||
51 7.500000 -0.048041 -0.012379
|
||||
52 7.600000 -0.046538 -0.017682
|
||||
53 7.700000 -0.044715 -0.018778
|
||||
54 7.800000 -0.042902 -0.017480
|
||||
55 7.900000 -0.041239 -0.015771
|
||||
56 8.000000 -0.039682 -0.015380
|
||||
57 8.100000 -0.038044 -0.017370
|
||||
58 8.200000 -0.036082 -0.021879
|
||||
59 8.300000 -0.033576 -0.028240
|
||||
60 8.400000 -0.030401 -0.035257
|
||||
61 8.500000 -0.026564 -0.041490
|
||||
62 8.600000 -0.022213 -0.045532
|
||||
63 8.700000 -0.017621 -0.046293
|
||||
64 8.800000 -0.013143 -0.043275
|
||||
65 8.900000 -0.009142 -0.036741
|
||||
66 9.000000 -0.005926 -0.027589
|
||||
67 9.100000 -0.003690 -0.017122
|
||||
68 9.200000 -0.002494 -0.006811
|
||||
69 9.300000 -0.002250 0.001942
|
||||
70 9.400000 -0.002749 0.008041
|
||||
71 9.500000 -0.003698 0.010927
|
||||
72 9.600000 -0.004776 0.010636
|
||||
73 9.700000 -0.005678 0.007417
|
||||
74 9.800000 -0.006108 0.001175
|
||||
75 9.900000 -0.005712 -0.009084
|
||||
76 10.000000 -0.003967 -0.025822
|
||||
77 10.100000 0.000000 -0.053522
|
||||
2
examples/mscg/output_9Jan17/b-spline.out
Normal file
2
examples/mscg/output_9Jan17/b-spline.out
Normal file
@ -0,0 +1,2 @@
|
||||
n: 1 1 6 12 2.400000000000002e+00 1.010000000000000e+01
|
||||
1.200460787805587e+03 2.169623423326193e+01 2.388396964379328e+01 -1.197754948555067e+01 6.472482422420378e+00 -1.483711824891365e+00 7.768139601662113e-01 -7.869494711740244e-01 4.830820182054661e-01 -1.892989444995645e-01 1.021275453070386e-01 -1.637649039972671e-01 5.570978712841167e-02 7.637188693695119e-03 -4.109175461195019e-03 -5.352186189455146e-02
|
||||
1
examples/mscg/output_9Jan17/rmin.in
Normal file
1
examples/mscg/output_9Jan17/rmin.in
Normal file
@ -0,0 +1 @@
|
||||
1 1 2.852369 10.000000 fm
|
||||
0
examples/mscg/output_9Jan17/rmin_b.in
Normal file
0
examples/mscg/output_9Jan17/rmin_b.in
Normal file
18
examples/mscg/output_9Jan17/sol_info.out
Normal file
18
examples/mscg/output_9Jan17/sol_info.out
Normal file
@ -0,0 +1,18 @@
|
||||
fm_matrix_rows:3000; fm_matrix_columns:16;
|
||||
Singular vector:
|
||||
2.442317e+00
|
||||
2.105009e+00
|
||||
1.433251e+00
|
||||
1.184602e+00
|
||||
9.739627e-01
|
||||
6.944898e-01
|
||||
5.376709e-01
|
||||
4.616070e-01
|
||||
3.257062e-01
|
||||
2.683729e-01
|
||||
1.530153e-01
|
||||
9.336288e-02
|
||||
5.042150e-02
|
||||
2.126912e-02
|
||||
1.446682e-02
|
||||
4.167763e-05
|
||||
1
examples/mscg/output_9Jan17/x.out
Normal file
1
examples/mscg/output_9Jan17/x.out
Normal file
@ -0,0 +1 @@
|
||||
<EFBFBD>-<2D><><EFBFBD><EFBFBD><EFBFBD>@47h<<3C>5@<40><><EFBFBD><EFBFBD>K<EFBFBD>7@<40>R<EFBFBD>]<5D><>'<27><><EFBFBD><EFBFBD>n<EFBFBD><6E>@I<DD8C>H<EFBFBD><48><EFBFBD><19>?<3F><><EFBFBD><EFBFBD>?r<>I<EFBFBD><49>.<2E><><11>^<5E><><EFBFBD><EFBFBD>?W<57><7F><EFBFBD>:ȿ(O<1D>%<25>?<3F>Ns<4E>?<3F>Ŀ<EFBFBD>:<3A>C<EFBFBD><43><EFBFBD>?<3F><><EFBFBD>:,H?<3F>}<7D>c<EFBFBD><63>p<EFBFBD><70><EFBFBD><EFBFBD><EFBFBD>7g<37><67>
|
||||
@ -78,7 +78,7 @@ run 100
|
||||
|
||||
# only output atoms near vacancy
|
||||
|
||||
compute coord all coord/atom $r
|
||||
compute coord all coord/atom cutoff $r
|
||||
|
||||
#dump events all custom 1 dump.prd id type x y z
|
||||
#dump_modify events thresh c_coord != 4
|
||||
|
||||
@ -80,7 +80,7 @@ velocity all zero linear
|
||||
|
||||
# only output atoms near vacancy
|
||||
|
||||
compute coord all coord/atom $r
|
||||
compute coord all coord/atom cutoff $r
|
||||
|
||||
#dump events all custom 1 dump.prd id type x y z
|
||||
#dump_modify events thresh c_coord != 4
|
||||
|
||||
10
examples/voronoi/README
Normal file
10
examples/voronoi/README
Normal file
@ -0,0 +1,10 @@
|
||||
Running this example requires that LAMMPS be built with the VORONOI
|
||||
package and its compute voronoi command. The compute uses the Voro++
|
||||
library, freely available at http://math.lbl.gov/voro++, to compute
|
||||
the Voronoi tesselation locally on each processor. Voro++ was
|
||||
developed by Chris H. Rycroft while at UC Berkeley / Lawrence Berkeley
|
||||
Laboratory.
|
||||
|
||||
See the lib/voronoi/README file for instructions on how to download
|
||||
and install the Voro++ library for use with LAMMPS.
|
||||
|
||||
@ -39,6 +39,8 @@ meam modified embedded atom method (MEAM) potential, MEAM package
|
||||
from Greg Wagner (Sandia)
|
||||
molfile hooks to VMD molfile plugins, used by the USER-MOLFILE package
|
||||
from Axel Kohlmeyer (Temple U) and the VMD development team
|
||||
mscg hooks to the MSCG library, used by fix_mscg command
|
||||
from Jacob Wagner and Greg Voth group (U Chicago)
|
||||
python hooks to the system Python library, used by the PYTHON package
|
||||
from the LAMMPS development team
|
||||
qmmm quantum mechanics/molecular mechanics coupling interface
|
||||
|
||||
8
lib/kokkos/.gitignore
vendored
8
lib/kokkos/.gitignore
vendored
@ -1,8 +0,0 @@
|
||||
# Standard ignores
|
||||
*~
|
||||
*.pyc
|
||||
\#*#
|
||||
.#*
|
||||
.*.swp
|
||||
.cproject
|
||||
.project
|
||||
284
lib/kokkos/CHANGELOG.md
Normal file
284
lib/kokkos/CHANGELOG.md
Normal file
@ -0,0 +1,284 @@
|
||||
# Change Log
|
||||
|
||||
## [2.02.07](https://github.com/kokkos/kokkos/tree/2.02.07) (2016-12-16)
|
||||
[Full Changelog](https://github.com/kokkos/kokkos/compare/2.02.01...2.02.07)
|
||||
|
||||
**Implemented enhancements:**
|
||||
|
||||
- Add CMake option to enable Cuda Lambda support [\#589](https://github.com/kokkos/kokkos/issues/589)
|
||||
- Add CMake option to enable Cuda RDC support [\#588](https://github.com/kokkos/kokkos/issues/588)
|
||||
- Add Initial Intel Sky Lake Xeon-HPC Compiler Support to Kokkos Make System [\#584](https://github.com/kokkos/kokkos/issues/584)
|
||||
- Building Tutorial Examples [\#582](https://github.com/kokkos/kokkos/issues/582)
|
||||
- Internal way for using ThreadVectorRange without TeamHandle [\#574](https://github.com/kokkos/kokkos/issues/574)
|
||||
- Testing: Add testing for uvm and rdc [\#571](https://github.com/kokkos/kokkos/issues/571)
|
||||
- Profiling: Add Memory Tracing and Region Markers [\#557](https://github.com/kokkos/kokkos/issues/557)
|
||||
- nvcc\_wrapper not installed with Kokkos built with CUDA through CMake [\#543](https://github.com/kokkos/kokkos/issues/543)
|
||||
- Improve DynRankView debug check [\#541](https://github.com/kokkos/kokkos/issues/541)
|
||||
- Benchmarks: Add Gather benchmark [\#536](https://github.com/kokkos/kokkos/issues/536)
|
||||
- Testing: add spot\_check option to test\_all\_sandia [\#535](https://github.com/kokkos/kokkos/issues/535)
|
||||
- Deprecate Kokkos::Impl::VerifyExecutionCanAccessMemorySpace [\#527](https://github.com/kokkos/kokkos/issues/527)
|
||||
- Add AtomicAdd support for 64bit float for Pascal [\#522](https://github.com/kokkos/kokkos/issues/522)
|
||||
- Add Restrict and Aligned memory trait [\#517](https://github.com/kokkos/kokkos/issues/517)
|
||||
- Kokkos Tests are Not Run using Compiler Optimization [\#501](https://github.com/kokkos/kokkos/issues/501)
|
||||
- Add support for clang 3.7 w/ openmp backend [\#393](https://github.com/kokkos/kokkos/issues/393)
|
||||
- Provide an error throw class [\#79](https://github.com/kokkos/kokkos/issues/79)
|
||||
|
||||
**Fixed bugs:**
|
||||
|
||||
- Cuda UVM Allocation test broken with UVM as default space [\#586](https://github.com/kokkos/kokkos/issues/586)
|
||||
- Bug \(develop branch only\): multiple tests are now failing when forcing uvm usage. [\#570](https://github.com/kokkos/kokkos/issues/570)
|
||||
- Error in generate\_makefile.sh for Kokkos when Compiler is Empty String/Fails [\#568](https://github.com/kokkos/kokkos/issues/568)
|
||||
- XL 13.1.4 incorrect C++11 flag [\#553](https://github.com/kokkos/kokkos/issues/553)
|
||||
- Improve DynRankView debug check [\#541](https://github.com/kokkos/kokkos/issues/541)
|
||||
- Installing Library on MAC broken due to cp -u [\#539](https://github.com/kokkos/kokkos/issues/539)
|
||||
- Intel Nightly Testing with Debug enabled fails [\#534](https://github.com/kokkos/kokkos/issues/534)
|
||||
|
||||
## [2.02.01](https://github.com/kokkos/kokkos/tree/2.02.01) (2016-11-01)
|
||||
[Full Changelog](https://github.com/kokkos/kokkos/compare/2.02.00...2.02.01)
|
||||
|
||||
**Implemented enhancements:**
|
||||
|
||||
- Add Changelog generation to our process. [\#506](https://github.com/kokkos/kokkos/issues/506)
|
||||
|
||||
**Fixed bugs:**
|
||||
|
||||
- Test scratch\_request fails in Serial with Debug enabled [\#520](https://github.com/kokkos/kokkos/issues/520)
|
||||
- Bug In BoundsCheck for DynRankView [\#516](https://github.com/kokkos/kokkos/issues/516)
|
||||
|
||||
## [2.02.00](https://github.com/kokkos/kokkos/tree/2.02.00) (2016-10-30)
|
||||
[Full Changelog](https://github.com/kokkos/kokkos/compare/2.01.10...2.02.00)
|
||||
|
||||
**Implemented enhancements:**
|
||||
|
||||
- Add PowerPC assembly for grabbing clock register in memory pool [\#511](https://github.com/kokkos/kokkos/issues/511)
|
||||
- Add GCC 6.x support [\#508](https://github.com/kokkos/kokkos/issues/508)
|
||||
- Test install and build against installed library [\#498](https://github.com/kokkos/kokkos/issues/498)
|
||||
- Makefile.kokkos adds expt-extended-lambda to cuda build with clang [\#490](https://github.com/kokkos/kokkos/issues/490)
|
||||
- Add top-level makefile option to just test kokkos-core unit-test [\#485](https://github.com/kokkos/kokkos/issues/485)
|
||||
- Split and harmonize Object Files of Core UnitTests to increase build parallelism [\#484](https://github.com/kokkos/kokkos/issues/484)
|
||||
- LayoutLeft to LayoutLeft subview for 3D and 4D views [\#473](https://github.com/kokkos/kokkos/issues/473)
|
||||
- Add official Cuda 8.0 support [\#468](https://github.com/kokkos/kokkos/issues/468)
|
||||
- Allow C++1Z Flag for Class Lambda capture [\#465](https://github.com/kokkos/kokkos/issues/465)
|
||||
- Add Clang 4.0+ compilation of Cuda code [\#455](https://github.com/kokkos/kokkos/issues/455)
|
||||
- Possible Issue with Intel 17.0.098 and GCC 6.1.0 in Develop Branch [\#445](https://github.com/kokkos/kokkos/issues/445)
|
||||
- Add name of view to "View bounds error" [\#432](https://github.com/kokkos/kokkos/issues/432)
|
||||
- Move Sort Binning Operators into Kokkos namespace [\#421](https://github.com/kokkos/kokkos/issues/421)
|
||||
- TaskPolicy - generate error when attempt to use uninitialized [\#396](https://github.com/kokkos/kokkos/issues/396)
|
||||
- Import WithoutInitializing and AllowPadding into Kokkos namespace [\#325](https://github.com/kokkos/kokkos/issues/325)
|
||||
- TeamThreadRange requires begin, end to be the same type [\#305](https://github.com/kokkos/kokkos/issues/305)
|
||||
- CudaUVMSpace should track \# allocations, due to CUDA limit on \# UVM allocations [\#300](https://github.com/kokkos/kokkos/issues/300)
|
||||
- Remove old View and its infrastructure [\#259](https://github.com/kokkos/kokkos/issues/259)
|
||||
|
||||
**Fixed bugs:**
|
||||
|
||||
- Bug in TestCuda\_Other.cpp: most likely assembly inserted into Device code [\#515](https://github.com/kokkos/kokkos/issues/515)
|
||||
- Cuda Compute Capability check of GPU is outdated [\#509](https://github.com/kokkos/kokkos/issues/509)
|
||||
- multi\_scratch test with hwloc and pthreads seg-faults. [\#504](https://github.com/kokkos/kokkos/issues/504)
|
||||
- generate\_makefile.bash: "make install" is broken [\#503](https://github.com/kokkos/kokkos/issues/503)
|
||||
- make clean in Out of Source Build/Tests Does Not Work Correctly [\#502](https://github.com/kokkos/kokkos/issues/502)
|
||||
- Makefiles for test and examples have issues in Cuda when CXX is not explicitly specified [\#497](https://github.com/kokkos/kokkos/issues/497)
|
||||
- Dispatch lambda test directly inside GTEST macro doesn't work with nvcc [\#491](https://github.com/kokkos/kokkos/issues/491)
|
||||
- UnitTests with HWLOC enabled fail if run with mpirun bound to a single core [\#489](https://github.com/kokkos/kokkos/issues/489)
|
||||
- Failing Reducer Test on Mac with Pthreads [\#479](https://github.com/kokkos/kokkos/issues/479)
|
||||
- make test Dumps Error with Clang Not Found [\#471](https://github.com/kokkos/kokkos/issues/471)
|
||||
- OpenMP TeamPolicy member broadcast not using correct volatile shared variable [\#424](https://github.com/kokkos/kokkos/issues/424)
|
||||
- TaskPolicy - generate error when attempt to use uninitialized [\#396](https://github.com/kokkos/kokkos/issues/396)
|
||||
- New task policy implementation is pulling in old experimental code. [\#372](https://github.com/kokkos/kokkos/issues/372)
|
||||
- MemoryPool unit test hangs on Power8 with GCC 6.1.0 [\#298](https://github.com/kokkos/kokkos/issues/298)
|
||||
|
||||
## [2.01.10](https://github.com/kokkos/kokkos/tree/2.01.10) (2016-09-27)
|
||||
[Full Changelog](https://github.com/kokkos/kokkos/compare/2.01.06...2.01.10)
|
||||
|
||||
**Implemented enhancements:**
|
||||
|
||||
- Enable Profiling by default in Tribits build [\#438](https://github.com/kokkos/kokkos/issues/438)
|
||||
- parallel\_reduce\(0\), parallel\_scan\(0\) unit tests [\#436](https://github.com/kokkos/kokkos/issues/436)
|
||||
- data\(\)==NULL after realloc with LayoutStride [\#351](https://github.com/kokkos/kokkos/issues/351)
|
||||
- Fix tutorials to track new Kokkos::View [\#323](https://github.com/kokkos/kokkos/issues/323)
|
||||
- Rename team policy set\_scratch\_size. [\#195](https://github.com/kokkos/kokkos/issues/195)
|
||||
|
||||
**Fixed bugs:**
|
||||
|
||||
- Possible Issue with Intel 17.0.098 and GCC 6.1.0 in Develop Branch [\#445](https://github.com/kokkos/kokkos/issues/445)
|
||||
- Makefile spits syntax error [\#435](https://github.com/kokkos/kokkos/issues/435)
|
||||
- Kokkos::sort fails for view with all the same values [\#422](https://github.com/kokkos/kokkos/issues/422)
|
||||
- Generic Reducers: can't accept inline constructed reducer [\#404](https://github.com/kokkos/kokkos/issues/404)
|
||||
- data\\(\\)==NULL after realloc with LayoutStride [\#351](https://github.com/kokkos/kokkos/issues/351)
|
||||
- const subview of const view with compile time dimensions on Cuda backend [\#310](https://github.com/kokkos/kokkos/issues/310)
|
||||
- Kokkos \(in Trilinos\) Causes Internal Compiler Error on CUDA 8.0.21-EA on POWER8 [\#307](https://github.com/kokkos/kokkos/issues/307)
|
||||
- Core Oversubscription Detection Broken? [\#159](https://github.com/kokkos/kokkos/issues/159)
|
||||
|
||||
|
||||
## [2.01.06](https://github.com/kokkos/kokkos/tree/2.01.06) (2016-09-02)
|
||||
[Full Changelog](https://github.com/kokkos/kokkos/compare/2.01.00...2.01.06)
|
||||
|
||||
**Implemented enhancements:**
|
||||
|
||||
- Add "standard" reducers for lambda-supportable customized reduce [\#411](https://github.com/kokkos/kokkos/issues/411)
|
||||
- TaskPolicy - single thread back-end execution [\#390](https://github.com/kokkos/kokkos/issues/390)
|
||||
- Kokkos master clone tag [\#387](https://github.com/kokkos/kokkos/issues/387)
|
||||
- Query memory requirements from task policy [\#378](https://github.com/kokkos/kokkos/issues/378)
|
||||
- Output order of test\_atomic.cpp is confusing [\#373](https://github.com/kokkos/kokkos/issues/373)
|
||||
- Missing testing for atomics [\#341](https://github.com/kokkos/kokkos/issues/341)
|
||||
- Feature request for Kokkos to provide Kokkos::atomic\_fetch\_max and atomic\_fetch\_min [\#336](https://github.com/kokkos/kokkos/issues/336)
|
||||
- TaskPolicy\<Cuda\> performance requires teams mapped to warps [\#218](https://github.com/kokkos/kokkos/issues/218)
|
||||
|
||||
**Fixed bugs:**
|
||||
|
||||
- Reduce with Teams broken for custom initialize [\#407](https://github.com/kokkos/kokkos/issues/407)
|
||||
- Failing Kokkos build on Debian [\#402](https://github.com/kokkos/kokkos/issues/402)
|
||||
- Failing Tests on NVIDIA Pascal GPUs [\#398](https://github.com/kokkos/kokkos/issues/398)
|
||||
- Algorithms: fill\_random assumes dimensions fit in unsigned int [\#389](https://github.com/kokkos/kokkos/issues/389)
|
||||
- Kokkos::subview with RandomAccess Memory Trait [\#385](https://github.com/kokkos/kokkos/issues/385)
|
||||
- Build warning \(signed / unsigned comparison\) in Cuda implementation [\#365](https://github.com/kokkos/kokkos/issues/365)
|
||||
- wrong results for a parallel\_reduce with CUDA8 / Maxwell50 [\#352](https://github.com/kokkos/kokkos/issues/352)
|
||||
- Hierarchical parallelism - 3 level unit test [\#344](https://github.com/kokkos/kokkos/issues/344)
|
||||
- Can I allocate a View w/ both WithoutInitializing & AllowPadding? [\#324](https://github.com/kokkos/kokkos/issues/324)
|
||||
- subview View layout determination [\#309](https://github.com/kokkos/kokkos/issues/309)
|
||||
- Unit tests with Cuda - Maxwell [\#196](https://github.com/kokkos/kokkos/issues/196)
|
||||
|
||||
## [2.01.00](https://github.com/kokkos/kokkos/tree/2.01.00) (2016-07-21)
|
||||
[Full Changelog](https://github.com/kokkos/kokkos/compare/End_C++98...2.01.00)
|
||||
|
||||
**Implemented enhancements:**
|
||||
|
||||
- Edit ViewMapping so assigning Views with the same custom layout compiles when const casting [\#327](https://github.com/kokkos/kokkos/issues/327)
|
||||
- DynRankView: Performance improvement for operator\(\) [\#321](https://github.com/kokkos/kokkos/issues/321)
|
||||
- Interoperability between static and dynamic rank views [\#295](https://github.com/kokkos/kokkos/issues/295)
|
||||
- subview member function ? [\#280](https://github.com/kokkos/kokkos/issues/280)
|
||||
- Inter-operatibility between View and DynRankView. [\#245](https://github.com/kokkos/kokkos/issues/245)
|
||||
- \(Trilinos\) build warning in atomic\_assign, with Kokkos::complex [\#177](https://github.com/kokkos/kokkos/issues/177)
|
||||
- View\<\>::shmem\_size should runtime check for number of arguments equal to rank [\#176](https://github.com/kokkos/kokkos/issues/176)
|
||||
- Custom reduction join via lambda argument [\#99](https://github.com/kokkos/kokkos/issues/99)
|
||||
- DynRankView with 0 dimensions passed in at construction [\#293](https://github.com/kokkos/kokkos/issues/293)
|
||||
- Inject view\_alloc and friends into Kokkos namespace [\#292](https://github.com/kokkos/kokkos/issues/292)
|
||||
- Less restrictive TeamPolicy reduction on Cuda [\#286](https://github.com/kokkos/kokkos/issues/286)
|
||||
- deep\_copy using remap with source execution space [\#267](https://github.com/kokkos/kokkos/issues/267)
|
||||
- Suggestion: Enable opt-in L1 caching via nvcc-wrapper [\#261](https://github.com/kokkos/kokkos/issues/261)
|
||||
- More flexible create\_mirror functions [\#260](https://github.com/kokkos/kokkos/issues/260)
|
||||
- Rename View::memory\_span to View::required\_allocation\_size [\#256](https://github.com/kokkos/kokkos/issues/256)
|
||||
- Use of subviews and views with compile-time dimensions [\#237](https://github.com/kokkos/kokkos/issues/237)
|
||||
- Use of subviews and views with compile-time dimensions [\#237](https://github.com/kokkos/kokkos/issues/237)
|
||||
- Kokkos::Timer [\#234](https://github.com/kokkos/kokkos/issues/234)
|
||||
- Fence CudaUVMSpace allocations [\#230](https://github.com/kokkos/kokkos/issues/230)
|
||||
- View::operator\(\) accept std::is\_integral and std::is\_enum [\#227](https://github.com/kokkos/kokkos/issues/227)
|
||||
- Allocating zero size View [\#216](https://github.com/kokkos/kokkos/issues/216)
|
||||
- Thread scalable memory pool [\#212](https://github.com/kokkos/kokkos/issues/212)
|
||||
- Add a way to disable memory leak output [\#194](https://github.com/kokkos/kokkos/issues/194)
|
||||
- Kokkos exec space init should init Kokkos profiling [\#192](https://github.com/kokkos/kokkos/issues/192)
|
||||
- Runtime rank wrapper for View [\#189](https://github.com/kokkos/kokkos/issues/189)
|
||||
- Profiling Interface [\#158](https://github.com/kokkos/kokkos/issues/158)
|
||||
- Fix View assignment \(of managed to unmanaged\) [\#153](https://github.com/kokkos/kokkos/issues/153)
|
||||
- Add unit test for assignment of managed View to unmanaged View [\#152](https://github.com/kokkos/kokkos/issues/152)
|
||||
- Check for oversubscription of threads with MPI in Kokkos::initialize [\#149](https://github.com/kokkos/kokkos/issues/149)
|
||||
- Dynamic resizeable 1dimensional view [\#143](https://github.com/kokkos/kokkos/issues/143)
|
||||
- Develop TaskPolicy for CUDA [\#142](https://github.com/kokkos/kokkos/issues/142)
|
||||
- New View : Test Compilation Downstream [\#138](https://github.com/kokkos/kokkos/issues/138)
|
||||
- New View Implementation [\#135](https://github.com/kokkos/kokkos/issues/135)
|
||||
- Add variant of subview that lets users add traits [\#134](https://github.com/kokkos/kokkos/issues/134)
|
||||
- NVCC-WRAPPER: Add --host-only flag [\#121](https://github.com/kokkos/kokkos/issues/121)
|
||||
- Address gtest issue with TriBITS Kokkos build outside of Trilinos [\#117](https://github.com/kokkos/kokkos/issues/117)
|
||||
- Make tests pass with -expt-extended-lambda on CUDA [\#108](https://github.com/kokkos/kokkos/issues/108)
|
||||
- Dynamic scheduling for parallel\_for and parallel\_reduce [\#106](https://github.com/kokkos/kokkos/issues/106)
|
||||
- Runtime or compile time error when reduce functor's join is not properly specified as const member function or with volatile arguments [\#105](https://github.com/kokkos/kokkos/issues/105)
|
||||
- Error out when the number of threads is modified after kokkos is initialized [\#104](https://github.com/kokkos/kokkos/issues/104)
|
||||
- Porting to POWER and remove assumption of X86 default [\#103](https://github.com/kokkos/kokkos/issues/103)
|
||||
- Dynamic scheduling option for RangePolicy [\#100](https://github.com/kokkos/kokkos/issues/100)
|
||||
- SharedMemory Support for Lambdas [\#81](https://github.com/kokkos/kokkos/issues/81)
|
||||
- Recommended TeamSize for Lambdas [\#80](https://github.com/kokkos/kokkos/issues/80)
|
||||
- Add Aggressive Vectorization Compilation mode [\#72](https://github.com/kokkos/kokkos/issues/72)
|
||||
- Dynamic scheduling team execution policy [\#53](https://github.com/kokkos/kokkos/issues/53)
|
||||
- UVM allocations in multi-GPU systems [\#50](https://github.com/kokkos/kokkos/issues/50)
|
||||
- Synchronic in Kokkos::Impl [\#44](https://github.com/kokkos/kokkos/issues/44)
|
||||
- index and dimension types in for loops [\#28](https://github.com/kokkos/kokkos/issues/28)
|
||||
- Subview assign of 1D Strided with stride 1 to LayoutLeft/Right [\#1](https://github.com/kokkos/kokkos/issues/1)
|
||||
|
||||
**Fixed bugs:**
|
||||
|
||||
- misspelled variable name in Kokkos\_Atomic\_Fetch + missing unit tests [\#340](https://github.com/kokkos/kokkos/issues/340)
|
||||
- seg fault Kokkos::Impl::CudaInternal::print\_configuration [\#338](https://github.com/kokkos/kokkos/issues/338)
|
||||
- Clang compiler error with named parallel\_reduce, tags, and TeamPolicy. [\#335](https://github.com/kokkos/kokkos/issues/335)
|
||||
- Shared Memory Allocation Error at parallel\_reduce [\#311](https://github.com/kokkos/kokkos/issues/311)
|
||||
- DynRankView: Fix resize and realloc [\#303](https://github.com/kokkos/kokkos/issues/303)
|
||||
- Scratch memory and dynamic scheduling [\#279](https://github.com/kokkos/kokkos/issues/279)
|
||||
- MemoryPool infinite loop when out of memory [\#312](https://github.com/kokkos/kokkos/issues/312)
|
||||
- Kokkos DynRankView changes break Sacado and Panzer [\#299](https://github.com/kokkos/kokkos/issues/299)
|
||||
- MemoryPool fails to compile on non-cuda non-x86 [\#297](https://github.com/kokkos/kokkos/issues/297)
|
||||
- Random Number Generator Fix [\#296](https://github.com/kokkos/kokkos/issues/296)
|
||||
- View template parameter ordering Bug [\#282](https://github.com/kokkos/kokkos/issues/282)
|
||||
- Serial task policy broken. [\#281](https://github.com/kokkos/kokkos/issues/281)
|
||||
- deep\_copy with LayoutStride should not memcpy [\#262](https://github.com/kokkos/kokkos/issues/262)
|
||||
- DualView::need\_sync should be a const method [\#248](https://github.com/kokkos/kokkos/issues/248)
|
||||
- Arbitrary-sized atomics on GPUs broken; loop forever [\#238](https://github.com/kokkos/kokkos/issues/238)
|
||||
- boolean reduction value\_type changes answer [\#225](https://github.com/kokkos/kokkos/issues/225)
|
||||
- Custom init\(\) function for parallel\_reduce with array value\_type [\#210](https://github.com/kokkos/kokkos/issues/210)
|
||||
- unit\_test Makefile is Broken - Recursively Calls itself until Machine Apocalypse. [\#202](https://github.com/kokkos/kokkos/issues/202)
|
||||
- nvcc\_wrapper Does Not Support -Xcompiler \<compiler option\> [\#198](https://github.com/kokkos/kokkos/issues/198)
|
||||
- Kokkos exec space init should init Kokkos profiling [\#192](https://github.com/kokkos/kokkos/issues/192)
|
||||
- Kokkos Threads Backend impl\_shared\_alloc Broken on Intel 16.1 \(Shepard Haswell\) [\#186](https://github.com/kokkos/kokkos/issues/186)
|
||||
- pthread back end hangs if used uninitialized [\#182](https://github.com/kokkos/kokkos/issues/182)
|
||||
- parallel\_reduce of size 0, not calling init/join [\#175](https://github.com/kokkos/kokkos/issues/175)
|
||||
- Bug in Threads with OpenMP enabled [\#173](https://github.com/kokkos/kokkos/issues/173)
|
||||
- KokkosExp\_SharedAlloc, m\_team\_work\_index inaccessible [\#166](https://github.com/kokkos/kokkos/issues/166)
|
||||
- 128-bit CAS without Assembly Broken? [\#161](https://github.com/kokkos/kokkos/issues/161)
|
||||
- fatal error: Cuda/Kokkos\_Cuda\_abort.hpp: No such file or directory [\#157](https://github.com/kokkos/kokkos/issues/157)
|
||||
- Power8: Fix OpenMP backend [\#139](https://github.com/kokkos/kokkos/issues/139)
|
||||
- Data race in Kokkos OpenMP initialization [\#131](https://github.com/kokkos/kokkos/issues/131)
|
||||
- parallel\_launch\_local\_memory and cuda 7.5 [\#125](https://github.com/kokkos/kokkos/issues/125)
|
||||
- Resize can fail with Cuda due to asynchronous dispatch [\#119](https://github.com/kokkos/kokkos/issues/119)
|
||||
- Qthread taskpolicy initialization bug. [\#92](https://github.com/kokkos/kokkos/issues/92)
|
||||
- Windows: sys/mman.h [\#89](https://github.com/kokkos/kokkos/issues/89)
|
||||
- Windows: atomic\_fetch\_sub\(\) [\#88](https://github.com/kokkos/kokkos/issues/88)
|
||||
- Windows: snprintf [\#87](https://github.com/kokkos/kokkos/issues/87)
|
||||
- Parallel\_Reduce with TeamPolicy and league size of 0 returns garbage [\#85](https://github.com/kokkos/kokkos/issues/85)
|
||||
- Throw with Cuda when using \(2D\) team\_policy parallel\_reduce with less than a warp size [\#76](https://github.com/kokkos/kokkos/issues/76)
|
||||
- Scalar views don't work with Kokkos::Atomic memory trait [\#69](https://github.com/kokkos/kokkos/issues/69)
|
||||
- Reduce the number of threads per team for Cuda [\#63](https://github.com/kokkos/kokkos/issues/63)
|
||||
- Named Kernels fail for reductions with CUDA [\#60](https://github.com/kokkos/kokkos/issues/60)
|
||||
- Kokkos View dimension\_\(\) for long returning unsigned int [\#20](https://github.com/kokkos/kokkos/issues/20)
|
||||
- atomic test hangs with LLVM [\#6](https://github.com/kokkos/kokkos/issues/6)
|
||||
- OpenMP Test should set omp\_set\_num\_threads to 1 [\#4](https://github.com/kokkos/kokkos/issues/4)
|
||||
|
||||
**Closed issues:**
|
||||
|
||||
- develop branch broken with CUDA 8 and --expt-extended-lambda [\#354](https://github.com/kokkos/kokkos/issues/354)
|
||||
- --arch=KNL with Intel 2016 build failure [\#349](https://github.com/kokkos/kokkos/issues/349)
|
||||
- Error building with Cuda when passing -DKOKKOS\_CUDA\_USE\_LAMBDA to generate\_makefile.bash [\#343](https://github.com/kokkos/kokkos/issues/343)
|
||||
- Can I safely use int indices in a 2-D View with capacity \> 2B? [\#318](https://github.com/kokkos/kokkos/issues/318)
|
||||
- Kokkos::ViewAllocateWithoutInitializing is not working [\#317](https://github.com/kokkos/kokkos/issues/317)
|
||||
- Intel build on Mac OS X [\#277](https://github.com/kokkos/kokkos/issues/277)
|
||||
- deleted [\#271](https://github.com/kokkos/kokkos/issues/271)
|
||||
- Broken Mira build [\#268](https://github.com/kokkos/kokkos/issues/268)
|
||||
- 32-bit build [\#246](https://github.com/kokkos/kokkos/issues/246)
|
||||
- parallel\_reduce with RDC crashes linker [\#232](https://github.com/kokkos/kokkos/issues/232)
|
||||
- build of Kokkos\_Sparse\_MV\_impl\_spmv\_Serial.cpp.o fails if you use nvcc and have cuda disabled [\#209](https://github.com/kokkos/kokkos/issues/209)
|
||||
- Kokkos Serial execution space is not tested with TeamPolicy. [\#207](https://github.com/kokkos/kokkos/issues/207)
|
||||
- Unit test failure on Hansen KokkosCore\_UnitTest\_Cuda\_MPI\_1 [\#200](https://github.com/kokkos/kokkos/issues/200)
|
||||
- nvcc compiler warning: calling a \_\_host\_\_ function from a \_\_host\_\_ \_\_device\_\_ function is not allowed [\#180](https://github.com/kokkos/kokkos/issues/180)
|
||||
- Intel 15 build error with defaulted "move" operators [\#171](https://github.com/kokkos/kokkos/issues/171)
|
||||
- missing libkokkos.a during Trilinos 12.4.2 build, yet other libkokkos\*.a libs are there [\#165](https://github.com/kokkos/kokkos/issues/165)
|
||||
- Tie atomic updates to execution space or even to thread team? \(speculation\) [\#144](https://github.com/kokkos/kokkos/issues/144)
|
||||
- New View: Compiletime/size Test [\#137](https://github.com/kokkos/kokkos/issues/137)
|
||||
- New View : Performance Test [\#136](https://github.com/kokkos/kokkos/issues/136)
|
||||
- Signed/unsigned comparison warning in CUDA parallel [\#130](https://github.com/kokkos/kokkos/issues/130)
|
||||
- Kokkos::complex: Need op\* w/ std::complex & real [\#126](https://github.com/kokkos/kokkos/issues/126)
|
||||
- Use uintptr\_t for casting pointers [\#110](https://github.com/kokkos/kokkos/issues/110)
|
||||
- Default thread mapping behavior between P and Q threads. [\#91](https://github.com/kokkos/kokkos/issues/91)
|
||||
- Windows: Atomic\_Fetch\_Exchange\(\) return type [\#90](https://github.com/kokkos/kokkos/issues/90)
|
||||
- Synchronic unit test is way too long [\#84](https://github.com/kokkos/kokkos/issues/84)
|
||||
- nvcc\_wrapper -\> $\(NVCC\_WRAPPER\) [\#42](https://github.com/kokkos/kokkos/issues/42)
|
||||
- Check compiler version and print helpful message [\#39](https://github.com/kokkos/kokkos/issues/39)
|
||||
- Kokkos shared memory on Cuda uses a lot of registers [\#31](https://github.com/kokkos/kokkos/issues/31)
|
||||
- Can not pass unit test `cuda.space` without a GT 720 [\#25](https://github.com/kokkos/kokkos/issues/25)
|
||||
- Makefile.kokkos lacks bounds checking option that CMake has [\#24](https://github.com/kokkos/kokkos/issues/24)
|
||||
- Kokkos can not complete unit tests with CUDA UVM enabled [\#23](https://github.com/kokkos/kokkos/issues/23)
|
||||
- Simplify teams + shared memory histogram example to remove vectorization [\#21](https://github.com/kokkos/kokkos/issues/21)
|
||||
- Kokkos needs to rever to ${PROJECT\_NAME}\_ENABLE\_CXX11 not Trilinos\_ENABLE\_CXX11 [\#17](https://github.com/kokkos/kokkos/issues/17)
|
||||
- Kokkos Base Makefile adds AVX to KNC Build [\#16](https://github.com/kokkos/kokkos/issues/16)
|
||||
- MS Visual Studio 2013 Build Errors [\#9](https://github.com/kokkos/kokkos/issues/9)
|
||||
- subview\(X, ALL\(\), j\) for 2-D LayoutRight View X: should it view a column? [\#5](https://github.com/kokkos/kokkos/issues/5)
|
||||
|
||||
## [End_C++98](https://github.com/kokkos/kokkos/tree/End_C++98) (2015-04-15)
|
||||
|
||||
|
||||
\* *This Change Log was automatically generated by [github_changelog_generator](https://github.com/skywinder/Github-Changelog-Generator)*
|
||||
@ -34,8 +34,8 @@ TRIBITS_PACKAGE_DECL(Kokkos) # ENABLE_SHADOWING_WARNINGS)
|
||||
# for compatibility with Kokkos' Makefile build system.
|
||||
|
||||
TRIBITS_ADD_OPTION_AND_DEFINE(
|
||||
${PACKAGE_NAME}_ENABLE_DEBUG
|
||||
${PACKAGE_NAME_UC}_HAVE_DEBUG
|
||||
Kokkos_ENABLE_DEBUG
|
||||
KOKKOS_HAVE_DEBUG
|
||||
"Enable run-time debug checks. These checks may be expensive, so they are disabled by default in a release build."
|
||||
${${PROJECT_NAME}_ENABLE_DEBUG}
|
||||
)
|
||||
@ -57,7 +57,21 @@ TRIBITS_ADD_OPTION_AND_DEFINE(
|
||||
TRIBITS_ADD_OPTION_AND_DEFINE(
|
||||
Kokkos_ENABLE_Cuda_UVM
|
||||
KOKKOS_USE_CUDA_UVM
|
||||
"Enable CUDA Unified Virtual Memory support in Kokkos."
|
||||
"Enable CUDA Unified Virtual Memory as the default in Kokkos."
|
||||
OFF
|
||||
)
|
||||
|
||||
TRIBITS_ADD_OPTION_AND_DEFINE(
|
||||
Kokkos_ENABLE_Cuda_RDC
|
||||
KOKKOS_HAVE_CUDA_RDC
|
||||
"Enable CUDA Relocatable Device Code support in Kokkos."
|
||||
OFF
|
||||
)
|
||||
|
||||
TRIBITS_ADD_OPTION_AND_DEFINE(
|
||||
Kokkos_ENABLE_Cuda_Lambda
|
||||
KOKKOS_HAVE_CUDA_LAMBDA
|
||||
"Enable CUDA LAMBDA support in Kokkos."
|
||||
OFF
|
||||
)
|
||||
|
||||
@ -72,6 +86,9 @@ ASSERT_DEFINED(TPL_ENABLE_Pthread)
|
||||
IF (Kokkos_ENABLE_Pthread AND NOT TPL_ENABLE_Pthread)
|
||||
MESSAGE(FATAL_ERROR "You set Kokkos_ENABLE_Pthread=ON, but Trilinos' support for Pthread(s) is not enabled (TPL_ENABLE_Pthread=OFF). This is not allowed. Please enable Pthreads in Trilinos before attempting to enable Kokkos' support for Pthreads.")
|
||||
ENDIF ()
|
||||
IF (NOT TPL_ENABLE_Pthread)
|
||||
ADD_DEFINITIONS(-DGTEST_HAS_PTHREAD=0)
|
||||
ENDIF()
|
||||
|
||||
TRIBITS_ADD_OPTION_AND_DEFINE(
|
||||
Kokkos_ENABLE_OpenMP
|
||||
@ -162,13 +179,28 @@ TRIBITS_ADD_OPTION_AND_DEFINE(
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
#
|
||||
# C) Process the subpackages for Kokkos
|
||||
# C) Install Kokkos' executable scripts
|
||||
#
|
||||
|
||||
|
||||
# nvcc_wrapper is Kokkos' wrapper for NVIDIA's NVCC CUDA compiler.
|
||||
# Kokkos needs nvcc_wrapper in order to build. Other libraries and
|
||||
# executables also need nvcc_wrapper. Thus, we need to install it.
|
||||
# If the argument of DESTINATION is a relative path, CMake computes it
|
||||
# as relative to ${CMAKE_INSTALL_PATH}.
|
||||
|
||||
INSTALL(PROGRAMS ${CMAKE_CURRENT_SOURCE_DIR}/bin/nvcc_wrapper DESTINATION bin)
|
||||
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
#
|
||||
# D) Process the subpackages for Kokkos
|
||||
#
|
||||
|
||||
TRIBITS_PROCESS_SUBPACKAGES()
|
||||
|
||||
#
|
||||
# D) If Kokkos itself is enabled, process the Kokkos package
|
||||
# E) If Kokkos itself is enabled, process the Kokkos package
|
||||
#
|
||||
|
||||
TRIBITS_PACKAGE_DEF()
|
||||
|
||||
@ -7,25 +7,26 @@ CXXFLAGS=$(CCFLAGS)
|
||||
#Options: OpenMP,Serial,Pthreads,Cuda
|
||||
KOKKOS_DEVICES ?= "OpenMP"
|
||||
#KOKKOS_DEVICES ?= "Pthreads"
|
||||
#Options: KNC,SNB,HSW,Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal61,ARMv8,BGQ,Power7,Power8,KNL,BDW
|
||||
#Options: KNC,SNB,HSW,Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal61,ARMv80,ARMv81,ARMv8-ThunderX,BGQ,Power7,Power8,KNL,BDW,SKX
|
||||
KOKKOS_ARCH ?= ""
|
||||
#Options: yes,no
|
||||
KOKKOS_DEBUG ?= "no"
|
||||
#Options: hwloc,librt,experimental_memkind
|
||||
KOKKOS_USE_TPLS ?= ""
|
||||
#Options: c++11
|
||||
#Options: c++11,c++1z
|
||||
KOKKOS_CXX_STANDARD ?= "c++11"
|
||||
#Options: aggressive_vectorization,disable_profiling
|
||||
KOKKOS_OPTIONS ?= ""
|
||||
|
||||
#Default settings specific options
|
||||
#Options: force_uvm,use_ldg,rdc,enable_lambda
|
||||
KOKKOS_CUDA_OPTIONS ?= ""
|
||||
KOKKOS_CUDA_OPTIONS ?= "enable_lambda"
|
||||
|
||||
# Check for general settings
|
||||
|
||||
KOKKOS_INTERNAL_ENABLE_DEBUG := $(strip $(shell echo $(KOKKOS_DEBUG) | grep "yes" | wc -l))
|
||||
KOKKOS_INTERNAL_ENABLE_CXX11 := $(strip $(shell echo $(KOKKOS_CXX_STANDARD) | grep "c++11" | wc -l))
|
||||
KOKKOS_INTERNAL_ENABLE_CXX1Z := $(strip $(shell echo $(KOKKOS_CXX_STANDARD) | grep "c++1z" | wc -l))
|
||||
|
||||
# Check for external libraries
|
||||
KOKKOS_INTERNAL_USE_HWLOC := $(strip $(shell echo $(KOKKOS_USE_TPLS) | grep "hwloc" | wc -l))
|
||||
@ -53,23 +54,71 @@ ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 0)
|
||||
endif
|
||||
endif
|
||||
|
||||
# Check for other Execution Spaces
|
||||
|
||||
KOKKOS_INTERNAL_USE_CUDA := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Cuda | wc -l))
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
|
||||
KOKKOS_INTERNAL_NVCC_PATH := $(shell which nvcc)
|
||||
CUDA_PATH ?= $(KOKKOS_INTERNAL_NVCC_PATH:/bin/nvcc=)
|
||||
KOKKOS_INTERNAL_COMPILER_NVCC_VERSION := $(shell nvcc --version 2>&1 | grep release | cut -d' ' -f5 | cut -d',' -f1 | tr -d .)
|
||||
endif
|
||||
|
||||
# Check OS
|
||||
|
||||
KOKKOS_OS := $(shell uname -s)
|
||||
KOKKOS_INTERNAL_OS_CYGWIN := $(shell uname -s | grep CYGWIN | wc -l)
|
||||
KOKKOS_INTERNAL_OS_LINUX := $(shell uname -s | grep Linux | wc -l)
|
||||
KOKKOS_INTERNAL_OS_DARWIN := $(shell uname -s | grep Darwin | wc -l)
|
||||
|
||||
# Check compiler
|
||||
|
||||
KOKKOS_INTERNAL_COMPILER_INTEL := $(shell $(CXX) --version 2>&1 | grep "Intel Corporation" | wc -l)
|
||||
KOKKOS_INTERNAL_COMPILER_PGI := $(shell $(CXX) --version 2>&1 | grep PGI | wc -l)
|
||||
KOKKOS_INTERNAL_COMPILER_XL := $(shell $(CXX) -qversion 2>&1 | grep XL | wc -l)
|
||||
KOKKOS_INTERNAL_COMPILER_CRAY := $(shell $(CXX) -craype-verbose 2>&1 | grep "CC-" | wc -l)
|
||||
KOKKOS_INTERNAL_OS_CYGWIN := $(shell uname | grep CYGWIN | wc -l)
|
||||
KOKKOS_INTERNAL_COMPILER_NVCC := $(shell $(CXX) --version 2>&1 | grep "nvcc" | wc -l)
|
||||
ifneq ($(OMPI_CXX),)
|
||||
KOKKOS_INTERNAL_COMPILER_NVCC := $(shell $(OMPI_CXX) --version 2>&1 | grep "nvcc" | wc -l)
|
||||
endif
|
||||
ifneq ($(MPICH_CXX),)
|
||||
KOKKOS_INTERNAL_COMPILER_NVCC := $(shell $(MPICH_CXX) --version 2>&1 | grep "nvcc" | wc -l)
|
||||
endif
|
||||
KOKKOS_INTERNAL_COMPILER_CLANG := $(shell $(CXX) --version 2>&1 | grep "clang" | wc -l)
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 2)
|
||||
KOKKOS_INTERNAL_COMPILER_CLANG = 1
|
||||
endif
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 2)
|
||||
KOKKOS_INTERNAL_COMPILER_XL = 1
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
|
||||
KOKKOS_INTERNAL_COMPILER_CLANG_VERSION := $(shell clang --version | grep version | cut -d ' ' -f3 | tr -d '.')
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
|
||||
ifeq ($(shell test $(KOKKOS_INTERNAL_COMPILER_CLANG_VERSION) -lt 400; echo $$?),0)
|
||||
$(error Compiling Cuda code directly with Clang requires version 4.0.0 or higher)
|
||||
endif
|
||||
KOKKOS_INTERNAL_CUDA_USE_LAMBDA := 1
|
||||
endif
|
||||
endif
|
||||
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
|
||||
KOKKOS_INTERNAL_OPENMP_FLAG := -mp
|
||||
else
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 1)
|
||||
KOKKOS_INTERNAL_OPENMP_FLAG := -qsmp=omp
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
|
||||
KOKKOS_INTERNAL_OPENMP_FLAG := -fopenmp=libomp
|
||||
else
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
|
||||
# OpenMP is turned on by default in Cray compiler environment
|
||||
KOKKOS_INTERNAL_OPENMP_FLAG :=
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 1)
|
||||
KOKKOS_INTERNAL_OPENMP_FLAG := -qsmp=omp
|
||||
else
|
||||
KOKKOS_INTERNAL_OPENMP_FLAG := -fopenmp
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
|
||||
# OpenMP is turned on by default in Cray compiler environment
|
||||
KOKKOS_INTERNAL_OPENMP_FLAG :=
|
||||
else
|
||||
KOKKOS_INTERNAL_OPENMP_FLAG := -fopenmp
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
@ -84,13 +133,11 @@ else
|
||||
KOKKOS_INTERNAL_CXX11_FLAG := -hstd=c++11
|
||||
else
|
||||
KOKKOS_INTERNAL_CXX11_FLAG := --std=c++11
|
||||
KOKKOS_INTERNAL_CXX1Z_FLAG := --std=c++1z
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
# Check for other Execution Spaces
|
||||
KOKKOS_INTERNAL_USE_CUDA := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Cuda | wc -l))
|
||||
|
||||
# Check for Kokkos Architecture settings
|
||||
|
||||
#Intel based
|
||||
@ -98,6 +145,7 @@ KOKKOS_INTERNAL_USE_ARCH_KNC := $(strip $(shell echo $(KOKKOS_ARCH) | grep KNC |
|
||||
KOKKOS_INTERNAL_USE_ARCH_SNB := $(strip $(shell echo $(KOKKOS_ARCH) | grep SNB | wc -l))
|
||||
KOKKOS_INTERNAL_USE_ARCH_HSW := $(strip $(shell echo $(KOKKOS_ARCH) | grep HSW | wc -l))
|
||||
KOKKOS_INTERNAL_USE_ARCH_BDW := $(strip $(shell echo $(KOKKOS_ARCH) | grep BDW | wc -l))
|
||||
KOKKOS_INTERNAL_USE_ARCH_SKX := $(strip $(shell echo $(KOKKOS_ARCH) | grep SKX | wc -l))
|
||||
KOKKOS_INTERNAL_USE_ARCH_KNL := $(strip $(shell echo $(KOKKOS_ARCH) | grep KNL | wc -l))
|
||||
|
||||
#NVIDIA based
|
||||
@ -110,11 +158,13 @@ KOKKOS_INTERNAL_USE_ARCH_MAXWELL50 := $(strip $(shell echo $(KOKKOS_ARCH) | grep
|
||||
KOKKOS_INTERNAL_USE_ARCH_MAXWELL52 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Maxwell52 | wc -l))
|
||||
KOKKOS_INTERNAL_USE_ARCH_MAXWELL53 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Maxwell53 | wc -l))
|
||||
KOKKOS_INTERNAL_USE_ARCH_PASCAL61 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Pascal61 | wc -l))
|
||||
KOKKOS_INTERNAL_USE_ARCH_PASCAL60 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Pascal60 | wc -l))
|
||||
KOKKOS_INTERNAL_USE_ARCH_NVIDIA := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KEPLER30) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER32) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER35) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER37) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_PASCAL61) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_PASCAL60) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53) | bc))
|
||||
@ -127,13 +177,16 @@ KOKKOS_INTERNAL_USE_ARCH_NVIDIA := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_AR
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER35) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER37) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_PASCAL61) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_PASCAL60) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53) | bc))
|
||||
endif
|
||||
|
||||
#ARM based
|
||||
KOKKOS_INTERNAL_USE_ARCH_ARMV80 := $(strip $(shell echo $(KOKKOS_ARCH) | grep ARMv8 | wc -l))
|
||||
KOKKOS_INTERNAL_USE_ARCH_ARMV80 := $(strip $(shell echo $(KOKKOS_ARCH) | grep ARMv80 | wc -l))
|
||||
KOKKOS_INTERNAL_USE_ARCH_ARMV81 := $(strip $(shell echo $(KOKKOS_ARCH) | grep ARMv81 | wc -l))
|
||||
KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX := $(strip $(shell echo $(KOKKOS_ARCH) | grep ARMv8-ThunderX | wc -l))
|
||||
|
||||
#IBM based
|
||||
KOKKOS_INTERNAL_USE_ARCH_BGQ := $(strip $(shell echo $(KOKKOS_ARCH) | grep BGQ | wc -l))
|
||||
@ -145,17 +198,18 @@ KOKKOS_INTERNAL_USE_ARCH_IBM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_
|
||||
KOKKOS_INTERNAL_USE_ARCH_AMDAVX := $(strip $(shell echo $(KOKKOS_ARCH) | grep AMDAVX | wc -l))
|
||||
|
||||
#Any AVX?
|
||||
KOKKOS_INTERNAL_USE_ARCH_AVX := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_SNB)+$(KOKKOS_INTERNAL_USE_ARCH_AMDAVX) | bc ))
|
||||
KOKKOS_INTERNAL_USE_ARCH_AVX2 := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_HSW)+$(KOKKOS_INTERNAL_USE_ARCH_BDW) | bc ))
|
||||
KOKKOS_INTERNAL_USE_ARCH_AVX512MIC := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KNL) | bc ))
|
||||
KOKKOS_INTERNAL_USE_ARCH_AVX := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_SNB)+$(KOKKOS_INTERNAL_USE_ARCH_AMDAVX) | bc ))
|
||||
KOKKOS_INTERNAL_USE_ARCH_AVX2 := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_HSW)+$(KOKKOS_INTERNAL_USE_ARCH_BDW) | bc ))
|
||||
KOKKOS_INTERNAL_USE_ARCH_AVX512MIC := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KNL) | bc ))
|
||||
KOKKOS_INTERNAL_USE_ARCH_AVX512XEON := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_SKX) | bc ))
|
||||
|
||||
# Decide what ISA level we are able to support
|
||||
KOKKOS_INTERNAL_USE_ISA_X86_64 := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_SNB)+$(KOKKOS_INTERNAL_USE_ARCH_HSW)+$(KOKKOS_INTERNAL_USE_ARCH_BDW)+$(KOKKOS_INTERNAL_USE_ARCH_KNL) | bc ))
|
||||
KOKKOS_INTERNAL_USE_ISA_X86_64 := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_SNB)+$(KOKKOS_INTERNAL_USE_ARCH_HSW)+$(KOKKOS_INTERNAL_USE_ARCH_BDW)+$(KOKKOS_INTERNAL_USE_ARCH_KNL)+$(KOKKOS_INTERNAL_USE_ARCH_SKX) | bc ))
|
||||
KOKKOS_INTERNAL_USE_ISA_KNC := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KNC) | bc ))
|
||||
KOKKOS_INTERNAL_USE_ISA_POWERPCLE := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_POWER8) | bc ))
|
||||
|
||||
#Incompatible flags?
|
||||
KOKKOS_INTERNAL_USE_ARCH_MULTIHOST := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_AVX)+$(KOKKOS_INTERNAL_USE_ARCH_AVX2)+$(KOKKOS_INTERNAL_USE_ARCH_KNC)+$(KOKKOS_INTERNAL_USE_ARCH_IBM)+$(KOKKOS_INTERNAL_USE_ARCH_AMDAVX)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV80)>1" | bc ))
|
||||
KOKKOS_INTERNAL_USE_ARCH_MULTIHOST := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_AVX)+$(KOKKOS_INTERNAL_USE_ARCH_AVX2)+$(KOKKOS_INTERNAL_USE_ARCH_KNC)+$(KOKKOS_INTERNAL_USE_ARCH_IBM)+$(KOKKOS_INTERNAL_USE_ARCH_AMDAVX)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV80)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV81)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX)>1" | bc ))
|
||||
KOKKOS_INTERNAL_USE_ARCH_MULTIGPU := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_NVIDIA)>1" | bc))
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MULTIHOST), 1)
|
||||
@ -207,15 +261,21 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ISA_X86_64), 1)
|
||||
tmp := $(shell echo "\#ifndef __CUDA_ARCH__" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#define KOKKOS_USE_ISA_X86_64" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#endif" >> KokkosCore_config.tmp )
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ISA_KNC), 1)
|
||||
tmp := $(shell echo "\#ifndef __CUDA_ARCH__" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#define KOKKOS_USE_ISA_KNC" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#endif" >> KokkosCore_config.tmp )
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ISA_POWERPCLE), 1)
|
||||
tmp := $(shell echo "\#ifndef __CUDA_ARCH__" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#define KOKKOS_USE_ISA_POWERPCLE" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#endif" >> KokkosCore_config.tmp )
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_QTHREAD), 1)
|
||||
@ -230,9 +290,15 @@ ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX11), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_HAVE_CXX11 1" >> KokkosCore_config.tmp )
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX1Z), 1)
|
||||
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX1Z_FLAG)
|
||||
tmp := $(shell echo "\#define KOKKOS_HAVE_CXX11 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#define KOKKOS_HAVE_CXX1Z 1" >> KokkosCore_config.tmp )
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_ENABLE_DEBUG), 1)
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
|
||||
KOKKOS_CXXFLAGS += -G
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1)
|
||||
KOKKOS_CXXFLAGS += -lineinfo
|
||||
endif
|
||||
KOKKOS_CXXFLAGS += -g
|
||||
KOKKOS_LDFLAGS += -g -ldl
|
||||
@ -273,13 +339,14 @@ endif
|
||||
|
||||
tmp := $(shell echo "/* Cuda Settings */" >> KokkosCore_config.tmp)
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
|
||||
ifeq ($(KOKKOS_INTERNAL_CUDA_USE_LDG), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_CUDA_USE_LDG_INTRINSIC 1" >> KokkosCore_config.tmp )
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_CUDA_USE_UVM), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_CUDA_USE_UVM 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#define KOKKOS_USE_CUDA_UVM 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#define KOKKOS_USE_CUDA_UVM 1" >> KokkosCore_config.tmp )
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_CUDA_USE_RELOC), 1)
|
||||
@ -289,27 +356,101 @@ ifeq ($(KOKKOS_INTERNAL_CUDA_USE_RELOC), 1)
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_CUDA_USE_LAMBDA), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_CUDA_USE_LAMBDA 1" >> KokkosCore_config.tmp )
|
||||
KOKKOS_CXXFLAGS += -expt-extended-lambda
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1)
|
||||
ifeq ($(shell test $(KOKKOS_INTERNAL_COMPILER_NVCC_VERSION) -gt 70; echo $$?),0)
|
||||
tmp := $(shell echo "\#define KOKKOS_CUDA_USE_LAMBDA 1" >> KokkosCore_config.tmp )
|
||||
KOKKOS_CXXFLAGS += -expt-extended-lambda
|
||||
else
|
||||
$(warning Warning: Cuda Lambda support was requested but NVCC version is too low. This requires NVCC for Cuda version 7.5 or higher. Disabling Lambda support now.)
|
||||
endif
|
||||
endif
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_CUDA_USE_LAMBDA 1" >> KokkosCore_config.tmp )
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
#Add Architecture flags
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_AVX 1" >> KokkosCore_config.tmp )
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV80), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_ARMV80 1" >> KokkosCore_config.tmp )
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
|
||||
KOKKOS_CXXFLAGS +=
|
||||
KOKKOS_LDFLAGS +=
|
||||
else
|
||||
KOKKOS_CXXFLAGS += -mavx
|
||||
KOKKOS_LDFLAGS += -mavx
|
||||
else
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
|
||||
KOKKOS_CXXFLAGS +=
|
||||
KOKKOS_LDFLAGS +=
|
||||
else
|
||||
KOKKOS_CXXFLAGS += -march=armv8-a
|
||||
KOKKOS_LDFLAGS += -march=armv8-a
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV81), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_ARMV81 1" >> KokkosCore_config.tmp )
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
|
||||
KOKKOS_CXXFLAGS +=
|
||||
KOKKOS_LDFLAGS +=
|
||||
else
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
|
||||
KOKKOS_CXXFLAGS +=
|
||||
KOKKOS_LDFLAGS +=
|
||||
else
|
||||
KOKKOS_CXXFLAGS += -march=armv8.1-a
|
||||
KOKKOS_LDFLAGS += -march=armv8.1-a
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_ARMV80 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_ARMV8_THUNDERX 1" >> KokkosCore_config.tmp )
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
|
||||
KOKKOS_CXXFLAGS +=
|
||||
KOKKOS_LDFLAGS +=
|
||||
else
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
|
||||
KOKKOS_CXXFLAGS +=
|
||||
KOKKOS_LDFLAGS +=
|
||||
else
|
||||
KOKKOS_CXXFLAGS += -march=armv8-a -mtune=thunderx
|
||||
KOKKOS_LDFLAGS += -march=armv8-a -mtune=thunderx
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_AVX 1" >> KokkosCore_config.tmp )
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
|
||||
KOKKOS_CXXFLAGS += -mavx
|
||||
KOKKOS_LDFLAGS += -mavx
|
||||
else
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
|
||||
|
||||
else
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
|
||||
KOKKOS_CXXFLAGS += -tp=sandybridge
|
||||
KOKKOS_LDFLAGS += -tp=sandybridge
|
||||
else
|
||||
# Assume that this is a really a GNU compiler
|
||||
KOKKOS_CXXFLAGS += -mavx
|
||||
KOKKOS_LDFLAGS += -mavx
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER8), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_POWER8 1" >> KokkosCore_config.tmp )
|
||||
KOKKOS_CXXFLAGS += -mcpu=power8 -mtune=power8
|
||||
KOKKOS_LDFLAGS += -mcpu=power8 -mtune=power8
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
|
||||
|
||||
else
|
||||
# Assume that this is a really a GNU compiler or it could be XL on P8
|
||||
KOKKOS_CXXFLAGS += -mcpu=power8 -mtune=power8
|
||||
KOKKOS_LDFLAGS += -mcpu=power8 -mtune=power8
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX2), 1)
|
||||
@ -322,7 +463,8 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX2), 1)
|
||||
|
||||
else
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
|
||||
|
||||
KOKKOS_CXXFLAGS += -tp=haswell
|
||||
KOKKOS_LDFLAGS += -tp=haswell
|
||||
else
|
||||
# Assume that this is a really a GNU compiler
|
||||
KOKKOS_CXXFLAGS += -march=core-avx2 -mtune=core-avx2
|
||||
@ -352,52 +494,85 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX512MIC), 1)
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX512XEON), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_AVX512XEON 1" >> KokkosCore_config.tmp )
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
|
||||
KOKKOS_CXXFLAGS += -xCORE-AVX512
|
||||
KOKKOS_LDFLAGS += -xCORE-AVX512
|
||||
else
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
|
||||
|
||||
else
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
|
||||
|
||||
else
|
||||
# Nothing here yet
|
||||
KOKKOS_CXXFLAGS += -march=skylake-avx512
|
||||
KOKKOS_LDFLAGS += -march=skylake-avx512
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KNC), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_KNC 1" >> KokkosCore_config.tmp )
|
||||
KOKKOS_CXXFLAGS += -mmic
|
||||
KOKKOS_LDFLAGS += -mmic
|
||||
endif
|
||||
|
||||
#Figure out the architecture flag for Cuda
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1)
|
||||
KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG=-arch
|
||||
endif
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
|
||||
KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG=-x cuda --cuda-gpu-arch
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER30), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER30 1" >> KokkosCore_config.tmp )
|
||||
KOKKOS_CXXFLAGS += -arch=sm_30
|
||||
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_30
|
||||
endif
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER32), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER32 1" >> KokkosCore_config.tmp )
|
||||
KOKKOS_CXXFLAGS += -arch=sm_32
|
||||
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_32
|
||||
endif
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER35), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER35 1" >> KokkosCore_config.tmp )
|
||||
KOKKOS_CXXFLAGS += -arch=sm_35
|
||||
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_35
|
||||
endif
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER37), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER37 1" >> KokkosCore_config.tmp )
|
||||
KOKKOS_CXXFLAGS += -arch=sm_37
|
||||
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_37
|
||||
endif
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL50 1" >> KokkosCore_config.tmp )
|
||||
KOKKOS_CXXFLAGS += -arch=sm_50
|
||||
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_50
|
||||
endif
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL52 1" >> KokkosCore_config.tmp )
|
||||
KOKKOS_CXXFLAGS += -arch=sm_52
|
||||
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_52
|
||||
endif
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL53 1" >> KokkosCore_config.tmp )
|
||||
KOKKOS_CXXFLAGS += -arch=sm_53
|
||||
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_53
|
||||
endif
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_PASCAL61), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_PASCAL 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_PASCAL61 1" >> KokkosCore_config.tmp )
|
||||
KOKKOS_CXXFLAGS += -arch=sm_61
|
||||
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_61
|
||||
endif
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_PASCAL60), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_PASCAL 1" >> KokkosCore_config.tmp )
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_PASCAL60 1" >> KokkosCore_config.tmp )
|
||||
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_60
|
||||
endif
|
||||
endif
|
||||
|
||||
@ -424,6 +599,7 @@ KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/containers/src/impl/*.cpp)
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
|
||||
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.cpp)
|
||||
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.hpp)
|
||||
KOKKOS_CXXFLAGS += -I$(CUDA_PATH)/include
|
||||
KOKKOS_LDFLAGS += -L$(CUDA_PATH)/lib64
|
||||
KOKKOS_LIBS += -lcudart -lcuda
|
||||
endif
|
||||
@ -443,7 +619,7 @@ endif
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
|
||||
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/OpenMP/*.cpp)
|
||||
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/OpenMP/*.hpp)
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1)
|
||||
KOKKOS_CXXFLAGS += -Xcompiler $(KOKKOS_INTERNAL_OPENMP_FLAG)
|
||||
else
|
||||
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_OPENMP_FLAG)
|
||||
@ -451,6 +627,14 @@ ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
|
||||
KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_OPENMP_FLAG)
|
||||
endif
|
||||
|
||||
#Explicitly set the GCC Toolchain for Clang
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
|
||||
KOKKOS_INTERNAL_GCC_PATH = $(shell which g++)
|
||||
KOKKOS_INTERNAL_GCC_TOOLCHAIN = $(KOKKOS_INTERNAL_GCC_PATH:/bin/g++=)
|
||||
KOKKOS_CXXFLAGS += --gcc-toolchain=$(KOKKOS_INTERNAL_GCC_TOOLCHAIN) -DKOKKOS_CUDA_CLANG_WORKAROUND -DKOKKOS_CUDA_USE_LDG_INTRINSIC
|
||||
KOKKOS_LDFLAGS += --gcc-toolchain=$(KOKKOS_INTERNAL_GCC_TOOLCHAIN)
|
||||
endif
|
||||
|
||||
#With Cygwin functions such as fdopen and fileno are not defined
|
||||
#when strict ansi is enabled. strict ansi gets enabled with --std=c++11
|
||||
#though. So we hard undefine it here. Not sure if that has any bad side effects
|
||||
@ -471,7 +655,7 @@ KOKKOS_OBJ_LINK = $(notdir $(KOKKOS_OBJ))
|
||||
include $(KOKKOS_PATH)/Makefile.targets
|
||||
|
||||
kokkos-clean:
|
||||
-rm -f $(KOKKOS_OBJ_LINK) KokkosCore_config.h KokkosCore_config.tmp libkokkos.a
|
||||
rm -f $(KOKKOS_OBJ_LINK) KokkosCore_config.h KokkosCore_config.tmp libkokkos.a
|
||||
|
||||
libkokkos.a: $(KOKKOS_OBJ_LINK) $(KOKKOS_SRC) $(KOKKOS_HEADERS)
|
||||
ar cr libkokkos.a $(KOKKOS_OBJ_LINK)
|
||||
|
||||
@ -14,20 +14,16 @@ Kokkos_hwloc.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_hwloc.
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_hwloc.cpp
|
||||
Kokkos_Serial.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial.cpp
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial.cpp
|
||||
Kokkos_Serial_TaskPolicy.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial_TaskPolicy.cpp
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial_TaskPolicy.cpp
|
||||
Kokkos_TaskQueue.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_TaskQueue.cpp
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_TaskQueue.cpp
|
||||
Kokkos_Serial_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial_Task.cpp
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial_Task.cpp
|
||||
Kokkos_Shape.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Shape.cpp
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Shape.cpp
|
||||
Kokkos_TaskQueue.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_TaskQueue.cpp
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_TaskQueue.cpp
|
||||
Kokkos_spinwait.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_spinwait.cpp
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_spinwait.cpp
|
||||
Kokkos_Profiling_Interface.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Profiling_Interface.cpp
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Profiling_Interface.cpp
|
||||
KokkosExp_SharedAlloc.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/KokkosExp_SharedAlloc.cpp
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/KokkosExp_SharedAlloc.cpp
|
||||
Kokkos_SharedAlloc.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_SharedAlloc.cpp
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_SharedAlloc.cpp
|
||||
Kokkos_MemoryPool.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_MemoryPool.cpp
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_MemoryPool.cpp
|
||||
|
||||
@ -38,8 +34,6 @@ Kokkos_CudaSpace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cu
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_CudaSpace.cpp
|
||||
Kokkos_Cuda_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Task.cpp
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Task.cpp
|
||||
Kokkos_Cuda_TaskPolicy.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_TaskPolicy.cpp
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_TaskPolicy.cpp
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
|
||||
@ -47,8 +41,6 @@ Kokkos_ThreadsExec_base.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec_base.cpp
|
||||
Kokkos_ThreadsExec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec.cpp
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec.cpp
|
||||
Kokkos_Threads_TaskPolicy.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads/Kokkos_Threads_TaskPolicy.cpp
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_Threads_TaskPolicy.cpp
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_QTHREAD), 1)
|
||||
@ -67,6 +59,4 @@ endif
|
||||
|
||||
Kokkos_HBWSpace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HBWSpace.cpp
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HBWSpace.cpp
|
||||
Kokkos_HBWAllocators.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HBWAllocators.cpp
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HBWAllocators.cpp
|
||||
|
||||
|
||||
@ -45,31 +45,32 @@ Primary tested compilers on X86 are:
|
||||
Intel 14.0.4
|
||||
Intel 15.0.2
|
||||
Intel 16.0.1
|
||||
Intel 17.0.098
|
||||
Clang 3.5.2
|
||||
Clang 3.6.1
|
||||
Clang 3.9.0
|
||||
|
||||
Primary tested compilers on Power 8 are:
|
||||
IBM XL 13.1.3 (OpenMP,Serial)
|
||||
GCC 4.9.2 (OpenMP,Serial)
|
||||
GCC 5.3.0 (OpenMP,Serial)
|
||||
GCC 5.4.0 (OpenMP,Serial)
|
||||
IBM XL 13.1.3 (OpenMP, Serial) (There is a workaround in place to avoid a compiler bug)
|
||||
|
||||
Primary tested compilers on Intel KNL are:
|
||||
Intel 16.2.181 (with gcc 4.7.2)
|
||||
Intel 17.0.098 (with gcc 4.7.2)
|
||||
|
||||
Secondary tested compilers are:
|
||||
CUDA 6.5 (with gcc 4.7.2)
|
||||
CUDA 7.0 (with gcc 4.7.2)
|
||||
CUDA 7.5 (with gcc 4.8.4)
|
||||
CUDA 7.5 (with gcc 4.7.2)
|
||||
CUDA 8.0 (with gcc 5.3.0 on X86 and gcc 5.4.0 on Power8)
|
||||
CUDA/Clang 8.0 using Clang/Trunk compiler
|
||||
|
||||
Other compilers working:
|
||||
X86:
|
||||
Intel 17.0.042 (the FENL example causes internal compiler error)
|
||||
PGI 15.4
|
||||
Cygwin 2.1.0 64bit with gcc 4.9.3
|
||||
KNL:
|
||||
Intel 16.2.181 (the FENL example causes internal compiler error)
|
||||
Intel 17.0.042 (the FENL example causes internal compiler error)
|
||||
|
||||
Known non-working combinations:
|
||||
Power8:
|
||||
GCC 6.1.0
|
||||
Pthreads backend
|
||||
|
||||
|
||||
@ -92,9 +93,10 @@ master branch, without -Werror and only for a select set of backends.
|
||||
|
||||
In the 'example/tutorial' directory you will find step by step tutorial
|
||||
examples which explain many of the features of Kokkos. They work with
|
||||
simple Makefiles. To build with g++ and OpenMP simply type 'make openmp'
|
||||
simple Makefiles. To build with g++ and OpenMP simply type 'make'
|
||||
in the 'example/tutorial' directory. This will build all examples in the
|
||||
subfolders.
|
||||
subfolders. To change the build options refer to the Programming Guide
|
||||
in the compilation section.
|
||||
|
||||
============================================================================
|
||||
====Running Unit Tests======================================================
|
||||
|
||||
@ -476,54 +476,54 @@ namespace Kokkos {
|
||||
};
|
||||
|
||||
template<class Generator>
|
||||
struct rand<Generator, ::Kokkos::complex<float> > {
|
||||
struct rand<Generator, Kokkos::complex<float> > {
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static ::Kokkos::complex<float> max () {
|
||||
return ::Kokkos::complex<float> (1.0, 1.0);
|
||||
static Kokkos::complex<float> max () {
|
||||
return Kokkos::complex<float> (1.0, 1.0);
|
||||
}
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static ::Kokkos::complex<float> draw (Generator& gen) {
|
||||
static Kokkos::complex<float> draw (Generator& gen) {
|
||||
const float re = gen.frand ();
|
||||
const float im = gen.frand ();
|
||||
return ::Kokkos::complex<float> (re, im);
|
||||
return Kokkos::complex<float> (re, im);
|
||||
}
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static ::Kokkos::complex<float> draw (Generator& gen, const ::Kokkos::complex<float>& range) {
|
||||
static Kokkos::complex<float> draw (Generator& gen, const Kokkos::complex<float>& range) {
|
||||
const float re = gen.frand (real (range));
|
||||
const float im = gen.frand (imag (range));
|
||||
return ::Kokkos::complex<float> (re, im);
|
||||
return Kokkos::complex<float> (re, im);
|
||||
}
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static ::Kokkos::complex<float> draw (Generator& gen, const ::Kokkos::complex<float>& start, const ::Kokkos::complex<float>& end) {
|
||||
static Kokkos::complex<float> draw (Generator& gen, const Kokkos::complex<float>& start, const Kokkos::complex<float>& end) {
|
||||
const float re = gen.frand (real (start), real (end));
|
||||
const float im = gen.frand (imag (start), imag (end));
|
||||
return ::Kokkos::complex<float> (re, im);
|
||||
return Kokkos::complex<float> (re, im);
|
||||
}
|
||||
};
|
||||
|
||||
template<class Generator>
|
||||
struct rand<Generator, ::Kokkos::complex<double> > {
|
||||
struct rand<Generator, Kokkos::complex<double> > {
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static ::Kokkos::complex<double> max () {
|
||||
return ::Kokkos::complex<double> (1.0, 1.0);
|
||||
static Kokkos::complex<double> max () {
|
||||
return Kokkos::complex<double> (1.0, 1.0);
|
||||
}
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static ::Kokkos::complex<double> draw (Generator& gen) {
|
||||
static Kokkos::complex<double> draw (Generator& gen) {
|
||||
const double re = gen.drand ();
|
||||
const double im = gen.drand ();
|
||||
return ::Kokkos::complex<double> (re, im);
|
||||
return Kokkos::complex<double> (re, im);
|
||||
}
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static ::Kokkos::complex<double> draw (Generator& gen, const ::Kokkos::complex<double>& range) {
|
||||
static Kokkos::complex<double> draw (Generator& gen, const Kokkos::complex<double>& range) {
|
||||
const double re = gen.drand (real (range));
|
||||
const double im = gen.drand (imag (range));
|
||||
return ::Kokkos::complex<double> (re, im);
|
||||
return Kokkos::complex<double> (re, im);
|
||||
}
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static ::Kokkos::complex<double> draw (Generator& gen, const ::Kokkos::complex<double>& start, const ::Kokkos::complex<double>& end) {
|
||||
static Kokkos::complex<double> draw (Generator& gen, const Kokkos::complex<double>& start, const Kokkos::complex<double>& end) {
|
||||
const double re = gen.drand (real (start), real (end));
|
||||
const double im = gen.drand (imag (start), imag (end));
|
||||
return ::Kokkos::complex<double> (re, im);
|
||||
return Kokkos::complex<double> (re, im);
|
||||
}
|
||||
};
|
||||
|
||||
@ -670,8 +670,8 @@ namespace Kokkos {
|
||||
double S = 2.0;
|
||||
double U;
|
||||
while(S>=1.0) {
|
||||
U = drand();
|
||||
const double V = drand();
|
||||
U = 2.0*drand() - 1.0;
|
||||
const double V = 2.0*drand() - 1.0;
|
||||
S = U*U+V*V;
|
||||
}
|
||||
return U*sqrt(-2.0*log(S)/S);
|
||||
@ -910,8 +910,8 @@ namespace Kokkos {
|
||||
double S = 2.0;
|
||||
double U;
|
||||
while(S>=1.0) {
|
||||
U = drand();
|
||||
const double V = drand();
|
||||
U = 2.0*drand() - 1.0;
|
||||
const double V = 2.0*drand() - 1.0;
|
||||
S = U*U+V*V;
|
||||
}
|
||||
return U*sqrt(-2.0*log(S)/S);
|
||||
@ -1163,8 +1163,8 @@ namespace Kokkos {
|
||||
double S = 2.0;
|
||||
double U;
|
||||
while(S>=1.0) {
|
||||
U = drand();
|
||||
const double V = drand();
|
||||
U = 2.0*drand() - 1.0;
|
||||
const double V = 2.0*drand() - 1.0;
|
||||
S = U*U+V*V;
|
||||
}
|
||||
return U*sqrt(-2.0*log(S)/S);
|
||||
|
||||
@ -51,7 +51,7 @@
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
namespace SortImpl {
|
||||
namespace Impl {
|
||||
|
||||
template<class ValuesViewType, int Rank=ValuesViewType::Rank>
|
||||
struct CopyOp;
|
||||
@ -199,7 +199,7 @@ public:
|
||||
|
||||
parallel_for(values.dimension_0(),
|
||||
bin_sort_sort_functor<ValuesViewType, offset_type,
|
||||
SortImpl::CopyOp<ValuesViewType> >(values,sorted_values,sort_order));
|
||||
Impl::CopyOp<ValuesViewType> >(values,sorted_values,sort_order));
|
||||
|
||||
deep_copy(values,sorted_values);
|
||||
}
|
||||
@ -262,17 +262,15 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
namespace SortImpl {
|
||||
|
||||
template<class KeyViewType>
|
||||
struct DefaultBinOp1D {
|
||||
struct BinOp1D {
|
||||
const int max_bins_;
|
||||
const double mul_;
|
||||
typename KeyViewType::const_value_type range_;
|
||||
typename KeyViewType::const_value_type min_;
|
||||
|
||||
//Construct BinOp with number of bins, minimum value and maxuimum value
|
||||
DefaultBinOp1D(int max_bins__, typename KeyViewType::const_value_type min,
|
||||
BinOp1D(int max_bins__, typename KeyViewType::const_value_type min,
|
||||
typename KeyViewType::const_value_type max )
|
||||
:max_bins_(max_bins__+1),mul_(1.0*max_bins__/(max-min)),range_(max-min),min_(min) {}
|
||||
|
||||
@ -298,13 +296,13 @@ struct DefaultBinOp1D {
|
||||
};
|
||||
|
||||
template<class KeyViewType>
|
||||
struct DefaultBinOp3D {
|
||||
struct BinOp3D {
|
||||
int max_bins_[3];
|
||||
double mul_[3];
|
||||
typename KeyViewType::non_const_value_type range_[3];
|
||||
typename KeyViewType::non_const_value_type min_[3];
|
||||
|
||||
DefaultBinOp3D(int max_bins__[], typename KeyViewType::const_value_type min[],
|
||||
BinOp3D(int max_bins__[], typename KeyViewType::const_value_type min[],
|
||||
typename KeyViewType::const_value_type max[] )
|
||||
{
|
||||
max_bins_[0] = max_bins__[0]+1;
|
||||
@ -348,109 +346,11 @@ struct DefaultBinOp3D {
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Scalar>
|
||||
struct min_max {
|
||||
Scalar min;
|
||||
Scalar max;
|
||||
bool init;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
min_max() {
|
||||
min = 0;
|
||||
max = 0;
|
||||
init = 0;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
min_max (const min_max& val) {
|
||||
min = val.min;
|
||||
max = val.max;
|
||||
init = val.init;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
min_max operator = (const min_max& val) {
|
||||
min = val.min;
|
||||
max = val.max;
|
||||
init = val.init;
|
||||
return *this;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator+= (const Scalar& val) {
|
||||
if(init) {
|
||||
min = min<val?min:val;
|
||||
max = max>val?max:val;
|
||||
} else {
|
||||
min = val;
|
||||
max = val;
|
||||
init = 1;
|
||||
}
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator+= (const min_max& val) {
|
||||
if(init && val.init) {
|
||||
min = min<val.min?min:val.min;
|
||||
max = max>val.max?max:val.max;
|
||||
} else {
|
||||
if(val.init) {
|
||||
min = val.min;
|
||||
max = val.max;
|
||||
init = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator+= (volatile const Scalar& val) volatile {
|
||||
if(init) {
|
||||
min = min<val?min:val;
|
||||
max = max>val?max:val;
|
||||
} else {
|
||||
min = val;
|
||||
max = val;
|
||||
init = 1;
|
||||
}
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator+= (volatile const min_max& val) volatile {
|
||||
if(init && val.init) {
|
||||
min = min<val.min?min:val.min;
|
||||
max = max>val.max?max:val.max;
|
||||
} else {
|
||||
if(val.init) {
|
||||
min = val.min;
|
||||
max = val.max;
|
||||
init = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template<class ViewType>
|
||||
struct min_max_functor {
|
||||
typedef typename ViewType::execution_space execution_space;
|
||||
ViewType view;
|
||||
typedef min_max<typename ViewType::non_const_value_type> value_type;
|
||||
min_max_functor (const ViewType view_):view(view_) {
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(const size_t& i, value_type& val) const {
|
||||
val += view(i);
|
||||
}
|
||||
};
|
||||
namespace Impl {
|
||||
|
||||
template<class ViewType>
|
||||
bool try_std_sort(ViewType view) {
|
||||
bool possible = true;
|
||||
#if ! KOKKOS_USING_EXP_VIEW
|
||||
size_t stride[8];
|
||||
view.stride(stride);
|
||||
#else
|
||||
size_t stride[8] = { view.stride_0()
|
||||
, view.stride_1()
|
||||
, view.stride_2()
|
||||
@ -460,8 +360,7 @@ bool try_std_sort(ViewType view) {
|
||||
, view.stride_6()
|
||||
, view.stride_7()
|
||||
};
|
||||
#endif
|
||||
possible = possible && Impl::is_same<typename ViewType::memory_space, HostSpace>::value;
|
||||
possible = possible && std::is_same<typename ViewType::memory_space, HostSpace>::value;
|
||||
possible = possible && (ViewType::Rank == 1);
|
||||
possible = possible && (stride[0] == 1);
|
||||
if(possible) {
|
||||
@ -470,27 +369,39 @@ bool try_std_sort(ViewType view) {
|
||||
return possible;
|
||||
}
|
||||
|
||||
template<class ViewType>
|
||||
struct min_max_functor {
|
||||
typedef Kokkos::Experimental::MinMaxScalar<typename ViewType::non_const_value_type> minmax_scalar;
|
||||
|
||||
ViewType view;
|
||||
min_max_functor(const ViewType& view_):view(view_) {}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (const size_t& i, minmax_scalar& minmax) const {
|
||||
if(view(i) < minmax.min_val) minmax.min_val = view(i);
|
||||
if(view(i) > minmax.max_val) minmax.max_val = view(i);
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
template<class ViewType>
|
||||
void sort(ViewType view, bool always_use_kokkos_sort = false) {
|
||||
if(!always_use_kokkos_sort) {
|
||||
if(SortImpl::try_std_sort(view)) return;
|
||||
if(Impl::try_std_sort(view)) return;
|
||||
}
|
||||
typedef BinOp1D<ViewType> CompType;
|
||||
|
||||
typedef SortImpl::DefaultBinOp1D<ViewType> CompType;
|
||||
SortImpl::min_max<typename ViewType::non_const_value_type> val;
|
||||
parallel_reduce(view.dimension_0(),SortImpl::min_max_functor<ViewType>(view),val);
|
||||
BinSort<ViewType, CompType> bin_sort(view,CompType(view.dimension_0()/2,val.min,val.max),true);
|
||||
Kokkos::Experimental::MinMaxScalar<typename ViewType::non_const_value_type> result;
|
||||
Kokkos::Experimental::MinMax<typename ViewType::non_const_value_type> reducer(result);
|
||||
parallel_reduce(Kokkos::RangePolicy<typename ViewType::execution_space>(0,view.dimension_0()),
|
||||
Impl::min_max_functor<ViewType>(view),reducer);
|
||||
if(result.min_val == result.max_val) return;
|
||||
BinSort<ViewType, CompType> bin_sort(view,CompType(view.dimension_0()/2,result.min_val,result.max_val),true);
|
||||
bin_sort.create_permute_vector();
|
||||
bin_sort.sort(view);
|
||||
}
|
||||
|
||||
/*template<class ViewType, class Comparator>
|
||||
void sort(ViewType view, Comparator comp, bool always_use_kokkos_sort = false) {
|
||||
|
||||
}*/
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
|
||||
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
|
||||
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
|
||||
INCLUDE_DIRECTORIES(REQUIRED_DURING_INSTALLATION_TESTING ${CMAKE_CURRENT_SOURCE_DIR})
|
||||
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../src )
|
||||
|
||||
SET(SOURCES
|
||||
|
||||
@ -7,21 +7,18 @@ vpath %.cpp ${KOKKOS_PATH}/algorithms/unit_tests
|
||||
default: build_all
|
||||
echo "End Build"
|
||||
|
||||
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
|
||||
CXX = $(KOKKOS_PATH)/config/nvcc_wrapper
|
||||
else
|
||||
CXX = g++
|
||||
endif
|
||||
|
||||
CXXFLAGS = -O3
|
||||
LINK ?= $(CXX)
|
||||
LDFLAGS ?= -lpthread
|
||||
|
||||
include $(KOKKOS_PATH)/Makefile.kokkos
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
|
||||
CXX = $(NVCC_WRAPPER)
|
||||
CXXFLAGS ?= -O3
|
||||
LINK = $(CXX)
|
||||
LDFLAGS ?= -lpthread
|
||||
else
|
||||
CXX ?= g++
|
||||
CXXFLAGS ?= -O3
|
||||
LINK ?= $(CXX)
|
||||
LDFLAGS ?= -lpthread
|
||||
endif
|
||||
|
||||
KOKKOS_CXXFLAGS += -I$(GTEST_PATH) -I${KOKKOS_PATH}/algorithms/unit_tests
|
||||
|
||||
TEST_TARGETS =
|
||||
|
||||
@ -131,6 +131,10 @@ void test_1D_sort(unsigned int n,bool force_kokkos) {
|
||||
typedef Kokkos::View<KeyType*,ExecutionSpace> KeyViewType;
|
||||
KeyViewType keys("Keys",n);
|
||||
|
||||
// Test sorting array with all numbers equal
|
||||
Kokkos::deep_copy(keys,KeyType(1));
|
||||
Kokkos::sort(keys,force_kokkos);
|
||||
|
||||
Kokkos::Random_XorShift64_Pool<ExecutionSpace> g(1931);
|
||||
Kokkos::fill_random(keys,g,Kokkos::Random_XorShift64_Pool<ExecutionSpace>::generator_type::MAX_URAND);
|
||||
|
||||
@ -174,7 +178,7 @@ void test_3D_sort(unsigned int n) {
|
||||
typename KeyViewType::value_type min[3] = {0,0,0};
|
||||
typename KeyViewType::value_type max[3] = {100,100,100};
|
||||
|
||||
typedef Kokkos::SortImpl::DefaultBinOp3D< KeyViewType > BinOp;
|
||||
typedef Kokkos::BinOp3D< KeyViewType > BinOp;
|
||||
BinOp bin_op(bin_max,min,max);
|
||||
Kokkos::BinSort< KeyViewType , BinOp >
|
||||
Sorter(keys,bin_op,false);
|
||||
|
||||
43
lib/kokkos/benchmarks/bytes_and_flops/Makefile
Normal file
43
lib/kokkos/benchmarks/bytes_and_flops/Makefile
Normal file
@ -0,0 +1,43 @@
|
||||
KOKKOS_PATH = ${HOME}/kokkos
|
||||
SRC = $(wildcard *.cpp)
|
||||
KOKKOS_DEVICES=Cuda
|
||||
KOKKOS_CUDA_OPTIONS=enable_lambda
|
||||
|
||||
default: build
|
||||
echo "Start Build"
|
||||
|
||||
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
|
||||
CXX = ${KOKKOS_PATH}/config/nvcc_wrapper
|
||||
EXE = bytes_and_flops.cuda
|
||||
KOKKOS_DEVICES = "Cuda,OpenMP"
|
||||
KOKKOS_ARCH = "SNB,Kepler35"
|
||||
else
|
||||
CXX = g++
|
||||
EXE = bytes_and_flops.host
|
||||
KOKKOS_DEVICES = "OpenMP"
|
||||
KOKKOS_ARCH = "SNB"
|
||||
endif
|
||||
|
||||
CXXFLAGS = -O3 -g
|
||||
|
||||
DEPFLAGS = -M
|
||||
LINK = ${CXX}
|
||||
LINKFLAGS =
|
||||
|
||||
OBJ = $(SRC:.cpp=.o)
|
||||
LIB =
|
||||
|
||||
include $(KOKKOS_PATH)/Makefile.kokkos
|
||||
|
||||
build: $(EXE)
|
||||
|
||||
$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
|
||||
$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
|
||||
|
||||
clean: kokkos-clean
|
||||
rm -f *.o *.cuda *.host
|
||||
|
||||
# Compilation rules
|
||||
|
||||
%.o:%.cpp $(KOKKOS_CPP_DEPENDS) bench.hpp bench_unroll_stride.hpp bench_stride.hpp
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
|
||||
99
lib/kokkos/benchmarks/bytes_and_flops/bench.hpp
Normal file
99
lib/kokkos/benchmarks/bytes_and_flops/bench.hpp
Normal file
@ -0,0 +1,99 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#include<Kokkos_Core.hpp>
|
||||
#include<impl/Kokkos_Timer.hpp>
|
||||
|
||||
template<class Scalar, int Unroll,int Stride>
|
||||
struct Run {
|
||||
static void run(int N, int K, int R, int F, int T, int S);
|
||||
};
|
||||
|
||||
template<class Scalar, int Stride>
|
||||
struct RunStride {
|
||||
static void run_1(int N, int K, int R, int F, int T, int S);
|
||||
static void run_2(int N, int K, int R, int F, int T, int S);
|
||||
static void run_3(int N, int K, int R, int F, int T, int S);
|
||||
static void run_4(int N, int K, int R, int F, int T, int S);
|
||||
static void run_5(int N, int K, int R, int F, int T, int S);
|
||||
static void run_6(int N, int K, int R, int F, int T, int S);
|
||||
static void run_7(int N, int K, int R, int F, int T, int S);
|
||||
static void run_8(int N, int K, int R, int F, int T, int S);
|
||||
static void run(int N, int K, int R, int U, int F, int T, int S);
|
||||
};
|
||||
|
||||
#define STRIDE 1
|
||||
#include<bench_stride.hpp>
|
||||
#undef STRIDE
|
||||
#define STRIDE 2
|
||||
#include<bench_stride.hpp>
|
||||
#undef STRIDE
|
||||
#define STRIDE 4
|
||||
#include<bench_stride.hpp>
|
||||
#undef STRIDE
|
||||
#define STRIDE 8
|
||||
#include<bench_stride.hpp>
|
||||
#undef STRIDE
|
||||
#define STRIDE 16
|
||||
#include<bench_stride.hpp>
|
||||
#undef STRIDE
|
||||
#define STRIDE 32
|
||||
#include<bench_stride.hpp>
|
||||
#undef STRIDE
|
||||
|
||||
template<class Scalar>
|
||||
void run_stride_unroll(int N, int K, int R, int D, int U, int F, int T, int S) {
|
||||
if(D == 1)
|
||||
RunStride<Scalar,1>::run(N,K,R,U,F,T,S);
|
||||
if(D == 2)
|
||||
RunStride<Scalar,2>::run(N,K,R,U,F,T,S);
|
||||
if(D == 4)
|
||||
RunStride<Scalar,4>::run(N,K,R,U,F,T,S);
|
||||
if(D == 8)
|
||||
RunStride<Scalar,8>::run(N,K,R,U,F,T,S);
|
||||
if(D == 16)
|
||||
RunStride<Scalar,16>::run(N,K,R,U,F,T,S);
|
||||
if(D == 32)
|
||||
RunStride<Scalar,32>::run(N,K,R,U,F,T,S);
|
||||
}
|
||||
|
||||
124
lib/kokkos/benchmarks/bytes_and_flops/bench_stride.hpp
Normal file
124
lib/kokkos/benchmarks/bytes_and_flops/bench_stride.hpp
Normal file
@ -0,0 +1,124 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
|
||||
#define UNROLL 1
|
||||
#include<bench_unroll_stride.hpp>
|
||||
#undef UNROLL
|
||||
#define UNROLL 2
|
||||
#include<bench_unroll_stride.hpp>
|
||||
#undef UNROLL
|
||||
#define UNROLL 3
|
||||
#include<bench_unroll_stride.hpp>
|
||||
#undef UNROLL
|
||||
#define UNROLL 4
|
||||
#include<bench_unroll_stride.hpp>
|
||||
#undef UNROLL
|
||||
#define UNROLL 5
|
||||
#include<bench_unroll_stride.hpp>
|
||||
#undef UNROLL
|
||||
#define UNROLL 6
|
||||
#include<bench_unroll_stride.hpp>
|
||||
#undef UNROLL
|
||||
#define UNROLL 7
|
||||
#include<bench_unroll_stride.hpp>
|
||||
#undef UNROLL
|
||||
#define UNROLL 8
|
||||
#include<bench_unroll_stride.hpp>
|
||||
#undef UNROLL
|
||||
|
||||
template<class Scalar>
|
||||
struct RunStride<Scalar,STRIDE> {
|
||||
static void run_1(int N, int K, int R, int F, int T, int S) {
|
||||
Run<Scalar,1,STRIDE>::run(N,K,R,F,T,S);
|
||||
}
|
||||
static void run_2(int N, int K, int R, int F, int T, int S) {
|
||||
Run<Scalar,2,STRIDE>::run(N,K,R,F,T,S);
|
||||
}
|
||||
static void run_3(int N, int K, int R, int F, int T, int S) {
|
||||
Run<Scalar,3,STRIDE>::run(N,K,R,F,T,S);
|
||||
}
|
||||
static void run_4(int N, int K, int R, int F, int T, int S) {
|
||||
Run<Scalar,4,STRIDE>::run(N,K,R,F,T,S);
|
||||
}
|
||||
static void run_5(int N, int K, int R, int F, int T, int S) {
|
||||
Run<Scalar,5,STRIDE>::run(N,K,R,F,T,S);
|
||||
}
|
||||
static void run_6(int N, int K, int R, int F, int T, int S) {
|
||||
Run<Scalar,6,STRIDE>::run(N,K,R,F,T,S);
|
||||
}
|
||||
static void run_7(int N, int K, int R, int F, int T, int S) {
|
||||
Run<Scalar,7,STRIDE>::run(N,K,R,F,T,S);
|
||||
}
|
||||
static void run_8(int N, int K, int R, int F, int T, int S) {
|
||||
Run<Scalar,8,STRIDE>::run(N,K,R,F,T,S);
|
||||
}
|
||||
|
||||
static void run(int N, int K, int R, int U, int F, int T, int S) {
|
||||
if(U==1) {
|
||||
run_1(N,K,R,F,T,S);
|
||||
}
|
||||
if(U==2) {
|
||||
run_2(N,K,R,F,T,S);
|
||||
}
|
||||
if(U==3) {
|
||||
run_3(N,K,R,F,T,S);
|
||||
}
|
||||
if(U==4) {
|
||||
run_4(N,K,R,F,T,S);
|
||||
}
|
||||
if(U==5) {
|
||||
run_5(N,K,R,F,T,S);
|
||||
}
|
||||
if(U==6) {
|
||||
run_6(N,K,R,F,T,S);
|
||||
}
|
||||
if(U==7) {
|
||||
run_7(N,K,R,F,T,S);
|
||||
}
|
||||
if(U==8) {
|
||||
run_8(N,K,R,F,T,S);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
148
lib/kokkos/benchmarks/bytes_and_flops/bench_unroll_stride.hpp
Normal file
148
lib/kokkos/benchmarks/bytes_and_flops/bench_unroll_stride.hpp
Normal file
@ -0,0 +1,148 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
template<class Scalar>
|
||||
struct Run<Scalar,UNROLL,STRIDE> {
|
||||
static void run(int N, int K, int R, int F, int T, int S) {
|
||||
Kokkos::View<Scalar**[STRIDE],Kokkos::LayoutRight> A("A",N,K);
|
||||
Kokkos::View<Scalar**[STRIDE],Kokkos::LayoutRight> B("B",N,K);
|
||||
Kokkos::View<Scalar**[STRIDE],Kokkos::LayoutRight> C("C",N,K);
|
||||
|
||||
Kokkos::deep_copy(A,Scalar(1.5));
|
||||
Kokkos::deep_copy(B,Scalar(2.5));
|
||||
Kokkos::deep_copy(C,Scalar(3.5));
|
||||
|
||||
Kokkos::Timer timer;
|
||||
Kokkos::parallel_for("BenchmarkKernel",Kokkos::TeamPolicy<>(N,T).set_scratch_size(0,Kokkos::PerTeam(S)),
|
||||
KOKKOS_LAMBDA ( const Kokkos::TeamPolicy<>::member_type& team) {
|
||||
const int n = team.league_rank();
|
||||
for(int r=0; r<R; r++) {
|
||||
Kokkos::parallel_for(Kokkos::TeamThreadRange(team,0,K), [&] (const int& i) {
|
||||
Scalar a1 = A(n,i,0);
|
||||
const Scalar b = B(n,i,0);
|
||||
#if(UNROLL>1)
|
||||
Scalar a2 = a1*1.3;
|
||||
#endif
|
||||
#if(UNROLL>2)
|
||||
Scalar a3 = a2*1.1;
|
||||
#endif
|
||||
#if(UNROLL>3)
|
||||
Scalar a4 = a3*1.1;
|
||||
#endif
|
||||
#if(UNROLL>4)
|
||||
Scalar a5 = a4*1.3;
|
||||
#endif
|
||||
#if(UNROLL>5)
|
||||
Scalar a6 = a5*1.1;
|
||||
#endif
|
||||
#if(UNROLL>6)
|
||||
Scalar a7 = a6*1.1;
|
||||
#endif
|
||||
#if(UNROLL>7)
|
||||
Scalar a8 = a7*1.1;
|
||||
#endif
|
||||
|
||||
|
||||
for(int f = 0; f<F; f++) {
|
||||
a1 += b*a1;
|
||||
#if(UNROLL>1)
|
||||
a2 += b*a2;
|
||||
#endif
|
||||
#if(UNROLL>2)
|
||||
a3 += b*a3;
|
||||
#endif
|
||||
#if(UNROLL>3)
|
||||
a4 += b*a4;
|
||||
#endif
|
||||
#if(UNROLL>4)
|
||||
a5 += b*a5;
|
||||
#endif
|
||||
#if(UNROLL>5)
|
||||
a6 += b*a6;
|
||||
#endif
|
||||
#if(UNROLL>6)
|
||||
a7 += b*a7;
|
||||
#endif
|
||||
#if(UNROLL>7)
|
||||
a8 += b*a8;
|
||||
#endif
|
||||
|
||||
|
||||
}
|
||||
#if(UNROLL==1)
|
||||
C(n,i,0) = a1;
|
||||
#endif
|
||||
#if(UNROLL==2)
|
||||
C(n,i,0) = a1+a2;
|
||||
#endif
|
||||
#if(UNROLL==3)
|
||||
C(n,i,0) = a1+a2+a3;
|
||||
#endif
|
||||
#if(UNROLL==4)
|
||||
C(n,i,0) = a1+a2+a3+a4;
|
||||
#endif
|
||||
#if(UNROLL==5)
|
||||
C(n,i,0) = a1+a2+a3+a4+a5;
|
||||
#endif
|
||||
#if(UNROLL==6)
|
||||
C(n,i,0) = a1+a2+a3+a4+a5+a6;
|
||||
#endif
|
||||
#if(UNROLL==7)
|
||||
C(n,i,0) = a1+a2+a3+a4+a5+a6+a7;
|
||||
#endif
|
||||
#if(UNROLL==8)
|
||||
C(n,i,0) = a1+a2+a3+a4+a5+a6+a7+a8;
|
||||
#endif
|
||||
|
||||
});
|
||||
}
|
||||
});
|
||||
Kokkos::fence();
|
||||
double seconds = timer.seconds();
|
||||
|
||||
double bytes = 1.0*N*K*R*3*sizeof(Scalar);
|
||||
double flops = 1.0*N*K*R*(F*2*UNROLL + 2*(UNROLL-1));
|
||||
printf("NKRUFTS: %i %i %i %i %i %i %i Time: %lfs Bandwidth: %lfGiB/s GFlop/s: %lf\n",N,K,R,UNROLL,F,T,S,seconds,1.0*bytes/seconds/1024/1024/1024,1.e-9*flops/seconds);
|
||||
}
|
||||
};
|
||||
|
||||
96
lib/kokkos/benchmarks/bytes_and_flops/main.cpp
Normal file
96
lib/kokkos/benchmarks/bytes_and_flops/main.cpp
Normal file
@ -0,0 +1,96 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#include<Kokkos_Core.hpp>
|
||||
#include<impl/Kokkos_Timer.hpp>
|
||||
#include<bench.hpp>
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
Kokkos::initialize();
|
||||
|
||||
|
||||
if(argc<10) {
|
||||
printf("Arguments: N K R D U F T S\n");
|
||||
printf(" P: Precision (1==float, 2==double)\n");
|
||||
printf(" N,K: dimensions of the 2D array to allocate\n");
|
||||
printf(" R: how often to loop through the K dimension with each team\n");
|
||||
printf(" D: distance between loaded elements (stride)\n");
|
||||
printf(" U: how many independent flops to do per load\n");
|
||||
printf(" F: how many times to repeat the U unrolled operations before reading next element\n");
|
||||
printf(" T: team size\n");
|
||||
printf(" S: shared memory per team (used to control occupancy on GPUs)\n");
|
||||
printf("Example Input GPU:\n");
|
||||
printf(" Bandwidth Bound : 2 100000 1024 1 1 1 1 256 6000\n");
|
||||
printf(" Cache Bound : 2 100000 1024 64 1 1 1 512 20000\n");
|
||||
printf(" Compute Bound : 2 100000 1024 1 1 8 64 256 6000\n");
|
||||
printf(" Load Slots Used : 2 20000 256 32 16 1 1 256 6000\n");
|
||||
printf(" Inefficient Load: 2 20000 256 32 2 1 1 256 20000\n");
|
||||
Kokkos::finalize();
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int P = atoi(argv[1]);
|
||||
int N = atoi(argv[2]);
|
||||
int K = atoi(argv[3]);
|
||||
int R = atoi(argv[4]);
|
||||
int D = atoi(argv[5]);
|
||||
int U = atoi(argv[6]);
|
||||
int F = atoi(argv[7]);
|
||||
int T = atoi(argv[8]);
|
||||
int S = atoi(argv[9]);
|
||||
|
||||
if(U>8) {printf("U must be 1-8\n"); return 0;}
|
||||
if( (D!=1) && (D!=2) && (D!=4) && (D!=8) && (D!=16) && (D!=32)) {printf("D must be one of 1,2,4,8,16,32\n"); return 0;}
|
||||
if( (P!=1) && (P!=2) ) {printf("P must be one of 1,2\n"); return 0;}
|
||||
|
||||
if(P==1) {
|
||||
run_stride_unroll<float>(N,K,R,D,U,F,T,S);
|
||||
}
|
||||
if(P==2) {
|
||||
run_stride_unroll<double>(N,K,R,D,U,F,T,S);
|
||||
}
|
||||
|
||||
Kokkos::finalize();
|
||||
}
|
||||
|
||||
44
lib/kokkos/benchmarks/gather/Makefile
Normal file
44
lib/kokkos/benchmarks/gather/Makefile
Normal file
@ -0,0 +1,44 @@
|
||||
KOKKOS_PATH = ${HOME}/kokkos
|
||||
SRC = $(wildcard *.cpp)
|
||||
KOKKOS_DEVICES=Cuda
|
||||
KOKKOS_CUDA_OPTIONS=enable_lambda
|
||||
|
||||
default: build
|
||||
echo "Start Build"
|
||||
|
||||
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
|
||||
CXX = ${KOKKOS_PATH}/config/nvcc_wrapper
|
||||
EXE = gather.cuda
|
||||
KOKKOS_DEVICES = "Cuda,OpenMP"
|
||||
KOKKOS_ARCH = "SNB,Kepler35"
|
||||
else
|
||||
CXX = g++
|
||||
EXE = gather.host
|
||||
KOKKOS_DEVICES = "OpenMP"
|
||||
KOKKOS_ARCH = "SNB"
|
||||
endif
|
||||
|
||||
CXXFLAGS = -O3 -g
|
||||
|
||||
DEPFLAGS = -M
|
||||
LINK = ${CXX}
|
||||
LINKFLAGS =
|
||||
|
||||
OBJ = $(SRC:.cpp=.o)
|
||||
LIB =
|
||||
|
||||
include $(KOKKOS_PATH)/Makefile.kokkos
|
||||
|
||||
$(warning ${KOKKOS_CPPFLAGS})
|
||||
build: $(EXE)
|
||||
|
||||
$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
|
||||
$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
|
||||
|
||||
clean: kokkos-clean
|
||||
rm -f *.o *.cuda *.host
|
||||
|
||||
# Compilation rules
|
||||
|
||||
%.o:%.cpp $(KOKKOS_CPP_DEPENDS) gather_unroll.hpp gather.hpp
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
|
||||
92
lib/kokkos/benchmarks/gather/gather.hpp
Normal file
92
lib/kokkos/benchmarks/gather/gather.hpp
Normal file
@ -0,0 +1,92 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
template<class Scalar, int UNROLL>
|
||||
struct RunGather {
|
||||
static void run(int N, int K, int D, int R, int F);
|
||||
};
|
||||
|
||||
#define UNROLL 1
|
||||
#include<gather_unroll.hpp>
|
||||
#undef UNROLL
|
||||
#define UNROLL 2
|
||||
#include<gather_unroll.hpp>
|
||||
#undef UNROLL
|
||||
#define UNROLL 3
|
||||
#include<gather_unroll.hpp>
|
||||
#undef UNROLL
|
||||
#define UNROLL 4
|
||||
#include<gather_unroll.hpp>
|
||||
#undef UNROLL
|
||||
#define UNROLL 5
|
||||
#include<gather_unroll.hpp>
|
||||
#undef UNROLL
|
||||
#define UNROLL 6
|
||||
#include<gather_unroll.hpp>
|
||||
#undef UNROLL
|
||||
#define UNROLL 7
|
||||
#include<gather_unroll.hpp>
|
||||
#undef UNROLL
|
||||
#define UNROLL 8
|
||||
#include<gather_unroll.hpp>
|
||||
#undef UNROLL
|
||||
|
||||
template<class Scalar>
|
||||
void run_gather_test(int N, int K, int D, int R, int U, int F) {
|
||||
if(U == 1)
|
||||
RunGather<Scalar,1>::run(N,K,D,R,F);
|
||||
if(U == 2)
|
||||
RunGather<Scalar,2>::run(N,K,D,R,F);
|
||||
if(U == 3)
|
||||
RunGather<Scalar,3>::run(N,K,D,R,F);
|
||||
if(U == 4)
|
||||
RunGather<Scalar,4>::run(N,K,D,R,F);
|
||||
if(U == 5)
|
||||
RunGather<Scalar,5>::run(N,K,D,R,F);
|
||||
if(U == 6)
|
||||
RunGather<Scalar,6>::run(N,K,D,R,F);
|
||||
if(U == 7)
|
||||
RunGather<Scalar,7>::run(N,K,D,R,F);
|
||||
if(U == 8)
|
||||
RunGather<Scalar,8>::run(N,K,D,R,F);
|
||||
}
|
||||
169
lib/kokkos/benchmarks/gather/gather_unroll.hpp
Normal file
169
lib/kokkos/benchmarks/gather/gather_unroll.hpp
Normal file
@ -0,0 +1,169 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#include<Kokkos_Core.hpp>
|
||||
#include<Kokkos_Random.hpp>
|
||||
|
||||
template<class Scalar>
|
||||
struct RunGather<Scalar,UNROLL> {
|
||||
static void run(int N, int K, int D, int R, int F) {
|
||||
Kokkos::View<int**> connectivity("Connectivity",N,K);
|
||||
Kokkos::View<Scalar*> A_in("Input",N);
|
||||
Kokkos::View<Scalar*> B_in("Input",N);
|
||||
Kokkos::View<Scalar*> C("Output",N);
|
||||
|
||||
Kokkos::Random_XorShift64_Pool<> rand_pool(12313);
|
||||
|
||||
Kokkos::deep_copy(A_in,1.5);
|
||||
Kokkos::deep_copy(B_in,2.0);
|
||||
|
||||
Kokkos::View<const Scalar*, Kokkos::MemoryTraits<Kokkos::RandomAccess> > A(A_in);
|
||||
Kokkos::View<const Scalar*, Kokkos::MemoryTraits<Kokkos::RandomAccess> > B(B_in);
|
||||
|
||||
Kokkos::parallel_for("InitKernel",N,
|
||||
KOKKOS_LAMBDA (const int& i) {
|
||||
auto rand_gen = rand_pool.get_state();
|
||||
for( int jj=0; jj<K; jj++) {
|
||||
connectivity(i,jj) = (rand_gen.rand(D) + i - D/2 + N)%N;
|
||||
}
|
||||
rand_pool.free_state(rand_gen);
|
||||
});
|
||||
Kokkos::fence();
|
||||
|
||||
|
||||
Kokkos::Timer timer;
|
||||
for(int r = 0; r<R; r++) {
|
||||
Kokkos::parallel_for("BenchmarkKernel",N,
|
||||
KOKKOS_LAMBDA (const int& i) {
|
||||
Scalar c = Scalar(0.0);
|
||||
for( int jj=0; jj<K; jj++) {
|
||||
const int j = connectivity(i,jj);
|
||||
Scalar a1 = A(j);
|
||||
const Scalar b = B(j);
|
||||
#if(UNROLL>1)
|
||||
Scalar a2 = a1*Scalar(1.3);
|
||||
#endif
|
||||
#if(UNROLL>2)
|
||||
Scalar a3 = a2*Scalar(1.1);
|
||||
#endif
|
||||
#if(UNROLL>3)
|
||||
Scalar a4 = a3*Scalar(1.1);
|
||||
#endif
|
||||
#if(UNROLL>4)
|
||||
Scalar a5 = a4*Scalar(1.3);
|
||||
#endif
|
||||
#if(UNROLL>5)
|
||||
Scalar a6 = a5*Scalar(1.1);
|
||||
#endif
|
||||
#if(UNROLL>6)
|
||||
Scalar a7 = a6*Scalar(1.1);
|
||||
#endif
|
||||
#if(UNROLL>7)
|
||||
Scalar a8 = a7*Scalar(1.1);
|
||||
#endif
|
||||
|
||||
|
||||
for(int f = 0; f<F; f++) {
|
||||
a1 += b*a1;
|
||||
#if(UNROLL>1)
|
||||
a2 += b*a2;
|
||||
#endif
|
||||
#if(UNROLL>2)
|
||||
a3 += b*a3;
|
||||
#endif
|
||||
#if(UNROLL>3)
|
||||
a4 += b*a4;
|
||||
#endif
|
||||
#if(UNROLL>4)
|
||||
a5 += b*a5;
|
||||
#endif
|
||||
#if(UNROLL>5)
|
||||
a6 += b*a6;
|
||||
#endif
|
||||
#if(UNROLL>6)
|
||||
a7 += b*a7;
|
||||
#endif
|
||||
#if(UNROLL>7)
|
||||
a8 += b*a8;
|
||||
#endif
|
||||
|
||||
|
||||
}
|
||||
#if(UNROLL==1)
|
||||
c += a1;
|
||||
#endif
|
||||
#if(UNROLL==2)
|
||||
c += a1+a2;
|
||||
#endif
|
||||
#if(UNROLL==3)
|
||||
c += a1+a2+a3;
|
||||
#endif
|
||||
#if(UNROLL==4)
|
||||
c += a1+a2+a3+a4;
|
||||
#endif
|
||||
#if(UNROLL==5)
|
||||
c += a1+a2+a3+a4+a5;
|
||||
#endif
|
||||
#if(UNROLL==6)
|
||||
c += a1+a2+a3+a4+a5+a6;
|
||||
#endif
|
||||
#if(UNROLL==7)
|
||||
c += a1+a2+a3+a4+a5+a6+a7;
|
||||
#endif
|
||||
#if(UNROLL==8)
|
||||
c += a1+a2+a3+a4+a5+a6+a7+a8;
|
||||
#endif
|
||||
|
||||
}
|
||||
C(i) = c ;
|
||||
});
|
||||
Kokkos::fence();
|
||||
}
|
||||
double seconds = timer.seconds();
|
||||
|
||||
double bytes = 1.0*N*K*R*(2*sizeof(Scalar)+sizeof(int)) + 1.0*N*R*sizeof(Scalar);
|
||||
double flops = 1.0*N*K*R*(F*2*UNROLL + 2*(UNROLL-1));
|
||||
double gather_ops = 1.0*N*K*R*2;
|
||||
printf("SNKDRUF: %i %i %i %i %i %i %i Time: %lfs Bandwidth: %lfGiB/s GFlop/s: %lf GGather/s: %lf\n",sizeof(Scalar)/4,N,K,D,R,UNROLL,F,seconds,1.0*bytes/seconds/1024/1024/1024,1.e-9*flops/seconds,1.e-9*gather_ops/seconds);
|
||||
}
|
||||
};
|
||||
@ -1,13 +1,13 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
@ -36,73 +36,58 @@
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#include <Kokkos_HostSpace.hpp>
|
||||
#include<Kokkos_Core.hpp>
|
||||
#include<impl/Kokkos_Timer.hpp>
|
||||
#include<gather.hpp>
|
||||
|
||||
#include <impl/Kokkos_HBWAllocators.hpp>
|
||||
#include <impl/Kokkos_Error.hpp>
|
||||
int main(int argc, char* argv[]) {
|
||||
Kokkos::initialize(argc,argv);
|
||||
|
||||
|
||||
#include <stdint.h> // uintptr_t
|
||||
#include <cstdlib> // for malloc, realloc, and free
|
||||
#include <cstring> // for memcpy
|
||||
|
||||
#if defined(KOKKOS_POSIX_MEMALIGN_AVAILABLE)
|
||||
#include <sys/mman.h> // for mmap, munmap, MAP_ANON, etc
|
||||
#include <unistd.h> // for sysconf, _SC_PAGE_SIZE, _SC_PHYS_PAGES
|
||||
#endif
|
||||
|
||||
#include <sstream>
|
||||
#include <iostream>
|
||||
|
||||
#ifdef KOKKOS_HAVE_HBWSPACE
|
||||
#include <memkind.h>
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Experimental {
|
||||
namespace Impl {
|
||||
#define MEMKIND_TYPE MEMKIND_HBW //hbw_get_kind(HBW_PAGESIZE_4KB)
|
||||
/*--------------------------------------------------------------------------*/
|
||||
|
||||
void* HBWMallocAllocator::allocate( size_t size )
|
||||
{
|
||||
std::cout<< "Allocate HBW: " << 1.0e-6*size << "MB" << std::endl;
|
||||
void * ptr = NULL;
|
||||
if (size) {
|
||||
ptr = memkind_malloc(MEMKIND_TYPE,size);
|
||||
|
||||
if (!ptr)
|
||||
{
|
||||
std::ostringstream msg ;
|
||||
msg << name() << ": allocate(" << size << ") FAILED";
|
||||
Kokkos::Impl::throw_runtime_exception( msg.str() );
|
||||
}
|
||||
if(argc<8) {
|
||||
printf("Arguments: S N K D\n");
|
||||
printf(" S: Scalar Type Size (1==float, 2==double, 4=complex<double>)\n");
|
||||
printf(" N: Number of entities\n");
|
||||
printf(" K: Number of things to gather per entity\n");
|
||||
printf(" D: Max distance of gathered things of an entity\n");
|
||||
printf(" R: how often to loop through the K dimension with each team\n");
|
||||
printf(" U: how many independent flops to do per load\n");
|
||||
printf(" F: how many times to repeat the U unrolled operations before reading next element\n");
|
||||
printf("Example Input GPU:\n");
|
||||
printf(" Bandwidth Bound : 2 10000000 1 1 10 1 1\n");
|
||||
printf(" Cache Bound : 2 10000000 64 1 10 1 1\n");
|
||||
printf(" Cache Gather : 2 10000000 64 256 10 1 1\n");
|
||||
printf(" Global Gather : 2 100000000 16 100000000 1 1 1\n");
|
||||
printf(" Typical MD : 2 100000 32 512 1000 8 2\n");
|
||||
Kokkos::finalize();
|
||||
return 0;
|
||||
}
|
||||
return ptr;
|
||||
|
||||
|
||||
int S = atoi(argv[1]);
|
||||
int N = atoi(argv[2]);
|
||||
int K = atoi(argv[3]);
|
||||
int D = atoi(argv[4]);
|
||||
int R = atoi(argv[5]);
|
||||
int U = atoi(argv[6]);
|
||||
int F = atoi(argv[7]);
|
||||
|
||||
if( (S!=1) && (S!=2) && (S!=4)) {printf("S must be one of 1,2,4\n"); return 0;}
|
||||
if( N<D ) {printf("N must be larger or equal to D\n"); return 0; }
|
||||
if(S==1) {
|
||||
run_gather_test<float>(N,K,D,R,U,F);
|
||||
}
|
||||
if(S==2) {
|
||||
run_gather_test<double>(N,K,D,R,U,F);
|
||||
}
|
||||
if(S==4) {
|
||||
run_gather_test<Kokkos::complex<double> >(N,K,D,R,U,F);
|
||||
}
|
||||
Kokkos::finalize();
|
||||
}
|
||||
|
||||
void HBWMallocAllocator::deallocate( void * ptr, size_t /*size*/ )
|
||||
{
|
||||
if (ptr) {
|
||||
memkind_free(MEMKIND_TYPE,ptr);
|
||||
}
|
||||
}
|
||||
|
||||
void * HBWMallocAllocator::reallocate(void * old_ptr, size_t /*old_size*/, size_t new_size)
|
||||
{
|
||||
void * ptr = memkind_realloc(MEMKIND_TYPE, old_ptr, new_size);
|
||||
|
||||
if (new_size > 0u && ptr == NULL) {
|
||||
Kokkos::Impl::throw_runtime_exception("Error: Malloc Allocator could not reallocate memory");
|
||||
}
|
||||
return ptr;
|
||||
}
|
||||
|
||||
} // namespace Impl
|
||||
} // namespace Experimental
|
||||
} // namespace Kokkos
|
||||
#endif
|
||||
284
lib/kokkos/bin/nvcc_wrapper
Executable file
284
lib/kokkos/bin/nvcc_wrapper
Executable file
@ -0,0 +1,284 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# This shell script (nvcc_wrapper) wraps both the host compiler and
|
||||
# NVCC, if you are building legacy C or C++ code with CUDA enabled.
|
||||
# The script remedies some differences between the interface of NVCC
|
||||
# and that of the host compiler, in particular for linking.
|
||||
# It also means that a legacy code doesn't need separate .cu files;
|
||||
# it can just use .cpp files.
|
||||
#
|
||||
# Default settings: change those according to your machine. For
|
||||
# example, you may have have two different wrappers with either icpc
|
||||
# or g++ as their back-end compiler. The defaults can be overwritten
|
||||
# by using the usual arguments (e.g., -arch=sm_30 -ccbin icpc).
|
||||
|
||||
default_arch="sm_35"
|
||||
#default_arch="sm_50"
|
||||
|
||||
#
|
||||
# The default C++ compiler.
|
||||
#
|
||||
host_compiler=${NVCC_WRAPPER_DEFAULT_COMPILER:-"g++"}
|
||||
#host_compiler="icpc"
|
||||
#host_compiler="/usr/local/gcc/4.8.3/bin/g++"
|
||||
#host_compiler="/usr/local/gcc/4.9.1/bin/g++"
|
||||
|
||||
#
|
||||
# Internal variables
|
||||
#
|
||||
|
||||
# C++ files
|
||||
cpp_files=""
|
||||
|
||||
# Host compiler arguments
|
||||
xcompiler_args=""
|
||||
|
||||
# Cuda (NVCC) only arguments
|
||||
cuda_args=""
|
||||
|
||||
# Arguments for both NVCC and Host compiler
|
||||
shared_args=""
|
||||
|
||||
# Linker arguments
|
||||
xlinker_args=""
|
||||
|
||||
# Object files passable to NVCC
|
||||
object_files=""
|
||||
|
||||
# Link objects for the host linker only
|
||||
object_files_xlinker=""
|
||||
|
||||
# Shared libraries with version numbers are not handled correctly by NVCC
|
||||
shared_versioned_libraries_host=""
|
||||
shared_versioned_libraries=""
|
||||
|
||||
# Does the User set the architecture
|
||||
arch_set=0
|
||||
|
||||
# Does the user overwrite the host compiler
|
||||
ccbin_set=0
|
||||
|
||||
#Error code of compilation
|
||||
error_code=0
|
||||
|
||||
# Do a dry run without actually compiling
|
||||
dry_run=0
|
||||
|
||||
# Skip NVCC compilation and use host compiler directly
|
||||
host_only=0
|
||||
|
||||
# Enable workaround for CUDA 6.5 for pragma ident
|
||||
replace_pragma_ident=0
|
||||
|
||||
# Mark first host compiler argument
|
||||
first_xcompiler_arg=1
|
||||
|
||||
temp_dir=${TMPDIR:-/tmp}
|
||||
|
||||
# Check if we have an optimization argument already
|
||||
optimization_applied=0
|
||||
|
||||
#echo "Arguments: $# $@"
|
||||
|
||||
while [ $# -gt 0 ]
|
||||
do
|
||||
case $1 in
|
||||
#show the executed command
|
||||
--show|--nvcc-wrapper-show)
|
||||
dry_run=1
|
||||
;;
|
||||
#run host compilation only
|
||||
--host-only)
|
||||
host_only=1
|
||||
;;
|
||||
#replace '#pragma ident' with '#ident' this is needed to compile OpenMPI due to a configure script bug and a non standardized behaviour of pragma with macros
|
||||
--replace-pragma-ident)
|
||||
replace_pragma_ident=1
|
||||
;;
|
||||
#handle source files to be compiled as cuda files
|
||||
*.cpp|*.cxx|*.cc|*.C|*.c++|*.cu)
|
||||
cpp_files="$cpp_files $1"
|
||||
;;
|
||||
# Ensure we only have one optimization flag because NVCC doesn't allow muliple
|
||||
-O*)
|
||||
if [ $optimization_applied -eq 1 ]; then
|
||||
echo "nvcc_wrapper - *warning* you have set multiple optimization flags (-O*), only the first is used because nvcc can only accept a single optimization setting."
|
||||
else
|
||||
shared_args="$shared_args $1"
|
||||
optimization_applied=1
|
||||
fi
|
||||
;;
|
||||
#Handle shared args (valid for both nvcc and the host compiler)
|
||||
-D*|-c|-I*|-L*|-l*|-g|--help|--version|-E|-M|-shared)
|
||||
shared_args="$shared_args $1"
|
||||
;;
|
||||
#Handle shared args that have an argument
|
||||
-o|-MT)
|
||||
shared_args="$shared_args $1 $2"
|
||||
shift
|
||||
;;
|
||||
#Handle known nvcc args
|
||||
-gencode*|--dryrun|--verbose|--keep|--keep-dir*|-G|--relocatable-device-code*|-lineinfo|-expt-extended-lambda|--resource-usage|-Xptxas*)
|
||||
cuda_args="$cuda_args $1"
|
||||
;;
|
||||
#Handle more known nvcc args
|
||||
--expt-extended-lambda|--expt-relaxed-constexpr)
|
||||
cuda_args="$cuda_args $1"
|
||||
;;
|
||||
#Handle known nvcc args that have an argument
|
||||
-rdc|-maxrregcount|--default-stream)
|
||||
cuda_args="$cuda_args $1 $2"
|
||||
shift
|
||||
;;
|
||||
#Handle c++11 setting
|
||||
--std=c++11|-std=c++11)
|
||||
shared_args="$shared_args $1"
|
||||
;;
|
||||
#strip of -std=c++98 due to nvcc warnings and Tribits will place both -std=c++11 and -std=c++98
|
||||
-std=c++98|--std=c++98)
|
||||
;;
|
||||
#strip of pedantic because it produces endless warnings about #LINE added by the preprocessor
|
||||
-pedantic|-Wpedantic|-ansi)
|
||||
;;
|
||||
#strip -Xcompiler because we add it
|
||||
-Xcompiler)
|
||||
if [ $first_xcompiler_arg -eq 1 ]; then
|
||||
xcompiler_args="$2"
|
||||
first_xcompiler_arg=0
|
||||
else
|
||||
xcompiler_args="$xcompiler_args,$2"
|
||||
fi
|
||||
shift
|
||||
;;
|
||||
#strip of "-x cu" because we add that
|
||||
-x)
|
||||
if [[ $2 != "cu" ]]; then
|
||||
if [ $first_xcompiler_arg -eq 1 ]; then
|
||||
xcompiler_args="-x,$2"
|
||||
first_xcompiler_arg=0
|
||||
else
|
||||
xcompiler_args="$xcompiler_args,-x,$2"
|
||||
fi
|
||||
fi
|
||||
shift
|
||||
;;
|
||||
#Handle -ccbin (if its not set we can set it to a default value)
|
||||
-ccbin)
|
||||
cuda_args="$cuda_args $1 $2"
|
||||
ccbin_set=1
|
||||
host_compiler=$2
|
||||
shift
|
||||
;;
|
||||
#Handle -arch argument (if its not set use a default
|
||||
-arch*)
|
||||
cuda_args="$cuda_args $1"
|
||||
arch_set=1
|
||||
;;
|
||||
#Handle -Xcudafe argument
|
||||
-Xcudafe)
|
||||
cuda_args="$cuda_args -Xcudafe $2"
|
||||
shift
|
||||
;;
|
||||
#Handle args that should be sent to the linker
|
||||
-Wl*)
|
||||
xlinker_args="$xlinker_args -Xlinker ${1:4:${#1}}"
|
||||
host_linker_args="$host_linker_args ${1:4:${#1}}"
|
||||
;;
|
||||
#Handle object files: -x cu applies to all input files, so give them to linker, except if only linking
|
||||
*.a|*.so|*.o|*.obj)
|
||||
object_files="$object_files $1"
|
||||
object_files_xlinker="$object_files_xlinker -Xlinker $1"
|
||||
;;
|
||||
#Handle object files which always need to use "-Xlinker": -x cu applies to all input files, so give them to linker, except if only linking
|
||||
*.dylib)
|
||||
object_files="$object_files -Xlinker $1"
|
||||
object_files_xlinker="$object_files_xlinker -Xlinker $1"
|
||||
;;
|
||||
#Handle shared libraries with *.so.* names which nvcc can't do.
|
||||
*.so.*)
|
||||
shared_versioned_libraries_host="$shared_versioned_libraries_host $1"
|
||||
shared_versioned_libraries="$shared_versioned_libraries -Xlinker $1"
|
||||
;;
|
||||
#All other args are sent to the host compiler
|
||||
*)
|
||||
if [ $first_xcompiler_arg -eq 1 ]; then
|
||||
xcompiler_args=$1
|
||||
first_xcompiler_arg=0
|
||||
else
|
||||
xcompiler_args="$xcompiler_args,$1"
|
||||
fi
|
||||
;;
|
||||
esac
|
||||
|
||||
shift
|
||||
done
|
||||
|
||||
#Add default host compiler if necessary
|
||||
if [ $ccbin_set -ne 1 ]; then
|
||||
cuda_args="$cuda_args -ccbin $host_compiler"
|
||||
fi
|
||||
|
||||
#Add architecture command
|
||||
if [ $arch_set -ne 1 ]; then
|
||||
cuda_args="$cuda_args -arch=$default_arch"
|
||||
fi
|
||||
|
||||
#Compose compilation command
|
||||
nvcc_command="nvcc $cuda_args $shared_args $xlinker_args $shared_versioned_libraries"
|
||||
if [ $first_xcompiler_arg -eq 0 ]; then
|
||||
nvcc_command="$nvcc_command -Xcompiler $xcompiler_args"
|
||||
fi
|
||||
|
||||
#Compose host only command
|
||||
host_command="$host_compiler $shared_args $xcompiler_args $host_linker_args $shared_versioned_libraries_host"
|
||||
|
||||
#nvcc does not accept '#pragma ident SOME_MACRO_STRING' but it does accept '#ident SOME_MACRO_STRING'
|
||||
if [ $replace_pragma_ident -eq 1 ]; then
|
||||
cpp_files2=""
|
||||
for file in $cpp_files
|
||||
do
|
||||
var=`grep pragma ${file} | grep ident | grep "#"`
|
||||
if [ "${#var}" -gt 0 ]
|
||||
then
|
||||
sed 's/#[\ \t]*pragma[\ \t]*ident/#ident/g' $file > $temp_dir/nvcc_wrapper_tmp_$file
|
||||
cpp_files2="$cpp_files2 $temp_dir/nvcc_wrapper_tmp_$file"
|
||||
else
|
||||
cpp_files2="$cpp_files2 $file"
|
||||
fi
|
||||
done
|
||||
cpp_files=$cpp_files2
|
||||
#echo $cpp_files
|
||||
fi
|
||||
|
||||
if [ "$cpp_files" ]; then
|
||||
nvcc_command="$nvcc_command $object_files_xlinker -x cu $cpp_files"
|
||||
else
|
||||
nvcc_command="$nvcc_command $object_files"
|
||||
fi
|
||||
|
||||
if [ "$cpp_files" ]; then
|
||||
host_command="$host_command $object_files $cpp_files"
|
||||
else
|
||||
host_command="$host_command $object_files"
|
||||
fi
|
||||
|
||||
#Print command for dryrun
|
||||
if [ $dry_run -eq 1 ]; then
|
||||
if [ $host_only -eq 1 ]; then
|
||||
echo $host_command
|
||||
else
|
||||
echo $nvcc_command
|
||||
fi
|
||||
exit 0
|
||||
fi
|
||||
|
||||
#Run compilation command
|
||||
if [ $host_only -eq 1 ]; then
|
||||
$host_command
|
||||
else
|
||||
$nvcc_command
|
||||
fi
|
||||
error_code=$?
|
||||
|
||||
#Report error code
|
||||
exit $error_code
|
||||
@ -53,12 +53,12 @@
|
||||
# ************************************************************************
|
||||
# @HEADER
|
||||
|
||||
include(${TRIBITS_DEPS_DIR}/CUDA.cmake)
|
||||
#include(${TRIBITS_DEPS_DIR}/CUDA.cmake)
|
||||
|
||||
IF (TPL_ENABLE_CUDA)
|
||||
GLOBAL_SET(TPL_CUSPARSE_LIBRARY_DIRS)
|
||||
GLOBAL_SET(TPL_CUSPARSE_INCLUDE_DIRS ${TPL_CUDA_INCLUDE_DIRS})
|
||||
GLOBAL_SET(TPL_CUSPARSE_LIBRARIES ${CUDA_cusparse_LIBRARY})
|
||||
TIBITS_CREATE_IMPORTED_TPL_LIBRARY(CUSPARSE)
|
||||
ENDIF()
|
||||
#IF (TPL_ENABLE_CUDA)
|
||||
# GLOBAL_SET(TPL_CUSPARSE_LIBRARY_DIRS)
|
||||
# GLOBAL_SET(TPL_CUSPARSE_INCLUDE_DIRS ${TPL_CUDA_INCLUDE_DIRS})
|
||||
# GLOBAL_SET(TPL_CUSPARSE_LIBRARIES ${CUDA_cusparse_LIBRARY})
|
||||
# TIBITS_CREATE_IMPORTED_TPL_LIBRARY(CUSPARSE)
|
||||
#ENDIF()
|
||||
|
||||
|
||||
@ -1,6 +1,16 @@
|
||||
INCLUDE(CMakeParseArguments)
|
||||
INCLUDE(CTest)
|
||||
|
||||
cmake_policy(SET CMP0054 NEW)
|
||||
|
||||
IF(NOT DEFINED ${PROJECT_NAME})
|
||||
project(Kokkos)
|
||||
ENDIF()
|
||||
|
||||
IF(NOT DEFINED ${${PROJECT_NAME}_ENABLE_DEBUG}})
|
||||
SET(${PROJECT_NAME}_ENABLE_DEBUG OFF)
|
||||
ENDIF()
|
||||
|
||||
FUNCTION(ASSERT_DEFINED VARS)
|
||||
FOREACH(VAR ${VARS})
|
||||
IF(NOT DEFINED ${VAR})
|
||||
@ -75,6 +85,13 @@ MACRO(TRIBITS_ADD_EXAMPLE_DIRECTORIES)
|
||||
|
||||
ENDMACRO()
|
||||
|
||||
|
||||
function(INCLUDE_DIRECTORIES)
|
||||
cmake_parse_arguments(INCLUDE_DIRECTORIES "REQUIRED_DURING_INSTALLATION_TESTING" "" "" ${ARGN})
|
||||
_INCLUDE_DIRECTORIES(${INCLUDE_DIRECTORIES_UNPARSED_ARGUMENTS})
|
||||
endfunction()
|
||||
|
||||
|
||||
MACRO(TARGET_TRANSFER_PROPERTY TARGET_NAME PROP_IN PROP_OUT)
|
||||
SET(PROP_VALUES)
|
||||
FOREACH(TARGET_X ${ARGN})
|
||||
@ -271,6 +288,11 @@ ENDFUNCTION()
|
||||
|
||||
ADD_CUSTOM_TARGET(check COMMAND ${CMAKE_CTEST_COMMAND} -VV -C ${CMAKE_CFG_INTDIR})
|
||||
|
||||
FUNCTION(TRIBITS_ADD_TEST)
|
||||
ENDFUNCTION()
|
||||
FUNCTION(TRIBITS_TPL_TENTATIVELY_ENABLE)
|
||||
ENDFUNCTION()
|
||||
|
||||
FUNCTION(TRIBITS_ADD_EXECUTABLE_AND_TEST EXE_NAME)
|
||||
|
||||
SET(options STANDARD_PASS_OUTPUT WILL_FAIL)
|
||||
|
||||
0
lib/kokkos/config/configure_compton_cpu.sh
Executable file → Normal file
0
lib/kokkos/config/configure_compton_cpu.sh
Executable file → Normal file
0
lib/kokkos/config/configure_compton_mic.sh
Executable file → Normal file
0
lib/kokkos/config/configure_compton_mic.sh
Executable file → Normal file
0
lib/kokkos/config/configure_kokkos.sh
Executable file → Normal file
0
lib/kokkos/config/configure_kokkos.sh
Executable file → Normal file
0
lib/kokkos/config/configure_kokkos_nvidia.sh
Executable file → Normal file
0
lib/kokkos/config/configure_kokkos_nvidia.sh
Executable file → Normal file
0
lib/kokkos/config/configure_shannon.sh
Executable file → Normal file
0
lib/kokkos/config/configure_shannon.sh
Executable file → Normal file
@ -91,9 +91,20 @@ Step 3:
|
||||
|
||||
// -------------------------------------------------------------------------------- //
|
||||
|
||||
Step 4:
|
||||
4.1. Once all Trilinos tests pass promote Kokkos develop branch to master on Github
|
||||
Step 4: Once all Trilinos tests pass promote Kokkos develop branch to master on Github
|
||||
4.1. Generate Changelog (You need a github API token)
|
||||
|
||||
Close all Open issues with "InDevelop" tag on github
|
||||
|
||||
(Not from kokkos directory)
|
||||
gitthub_changelog_generator kokkos/kokkos --token TOKEN --no-pull-requests --include-labels 'InDevelop' --enhancement-labels 'enhancement,Feature Request' --future-release 'NEWTAG' --between-tags 'NEWTAG,OLDTAG'
|
||||
|
||||
(Copy the new section from the generated CHANGELOG.md to the kokkos/CHANGELOG.md)
|
||||
(Make desired changes to CHANGELOG.md to enhance clarity)
|
||||
(Commit and push the CHANGELOG to develop)
|
||||
|
||||
4.2 Merge develop into Master
|
||||
|
||||
- DO NOT fast-forward the merge!!!!
|
||||
|
||||
(From kokkos directory):
|
||||
@ -103,7 +114,7 @@ Step 4:
|
||||
git reset --hard origin/master
|
||||
git merge --no-ff origin/develop
|
||||
|
||||
4.2. Update the tag in kokkos/config/master_history.txt
|
||||
4.3. Update the tag in kokkos/config/master_history.txt
|
||||
Tag description: MajorNumber.MinorNumber.WeeksSinceMinorNumberUpdate
|
||||
Tag format: #.#.##
|
||||
|
||||
|
||||
@ -1,3 +1,6 @@
|
||||
tag: 2.01.00 date: 07:21:2016 master: xxxxxxxx develop: fa6dfcc4
|
||||
tag: 2.01.06 date: 09:02:2016 master: 9afaa87f develop: 555f1a3a
|
||||
|
||||
tag: 2.01.10 date: 09:27:2016 master: e4119325 develop: e6cda11e
|
||||
tag: 2.02.00 date: 10:30:2016 master: 6c90a581 develop: ca3dd56e
|
||||
tag: 2.02.01 date: 11:01:2016 master: 9c698c86 develop: b0072304
|
||||
tag: 2.02.07 date: 12:16:2016 master: 4b4cc4ba develop: 382c0966
|
||||
|
||||
@ -121,6 +121,10 @@ do
|
||||
-gencode*|--dryrun|--verbose|--keep|--keep-dir*|-G|--relocatable-device-code*|-lineinfo|-expt-extended-lambda|--resource-usage|-Xptxas*)
|
||||
cuda_args="$cuda_args $1"
|
||||
;;
|
||||
#Handle more known nvcc args
|
||||
--expt-extended-lambda|--expt-relaxed-constexpr)
|
||||
cuda_args="$cuda_args $1"
|
||||
;;
|
||||
#Handle known nvcc args that have an argument
|
||||
-rdc|-maxrregcount|--default-stream)
|
||||
cuda_args="$cuda_args $1 $2"
|
||||
|
||||
@ -16,6 +16,8 @@ elif [[ "$HOSTNAME" =~ .*bowman.* ]]; then
|
||||
MACHINE=bowman
|
||||
elif [[ "$HOSTNAME" =~ node.* ]]; then # Warning: very generic name
|
||||
MACHINE=shepard
|
||||
elif [[ "$HOSTNAME" =~ apollo ]]; then
|
||||
MACHINE=apollo
|
||||
elif [ ! -z "$SEMS_MODULEFILES_ROOT" ]; then
|
||||
MACHINE=sems
|
||||
else
|
||||
@ -28,6 +30,7 @@ IBM_BUILD_LIST="OpenMP,Serial,OpenMP_Serial"
|
||||
INTEL_BUILD_LIST="OpenMP,Pthread,Serial,OpenMP_Serial,Pthread_Serial"
|
||||
CLANG_BUILD_LIST="Pthread,Serial,Pthread_Serial"
|
||||
CUDA_BUILD_LIST="Cuda_OpenMP,Cuda_Pthread,Cuda_Serial"
|
||||
CUDA_IBM_BUILD_LIST="Cuda_OpenMP,Cuda_Serial"
|
||||
|
||||
GCC_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wignored-qualifiers,-Wempty-body,-Wclobbered,-Wuninitialized"
|
||||
IBM_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized"
|
||||
@ -44,102 +47,12 @@ BUILD_ONLY=False
|
||||
declare -i NUM_JOBS_TO_RUN_IN_PARALLEL=3
|
||||
TEST_SCRIPT=False
|
||||
SKIP_HWLOC=False
|
||||
SPOT_CHECK=False
|
||||
|
||||
ARCH_FLAG=""
|
||||
PRINT_HELP=False
|
||||
OPT_FLAG=""
|
||||
KOKKOS_OPTIONS=""
|
||||
|
||||
#
|
||||
# Machine specific config
|
||||
#
|
||||
|
||||
if [ "$MACHINE" = "sems" ]; then
|
||||
source /projects/modulefiles/utils/sems-modules-init.sh
|
||||
source /projects/modulefiles/utils/kokkos-modules-init.sh
|
||||
|
||||
BASE_MODULE_LIST="<COMPILER_NAME>/<COMPILER_VERSION>/base,hwloc/1.10.1/<COMPILER_NAME>/<COMPILER_VERSION>/base"
|
||||
CUDA_MODULE_LIST="<COMPILER_NAME>/<COMPILER_VERSION>,gcc/4.7.2/base"
|
||||
|
||||
# Format: (compiler module-list build-list exe-name warning-flag)
|
||||
COMPILERS=("gcc/4.7.2 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
|
||||
"gcc/4.8.4 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
|
||||
"gcc/4.9.2 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
|
||||
"gcc/5.1.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
|
||||
"intel/14.0.4 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
|
||||
"intel/15.0.2 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
|
||||
"intel/16.0.1 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
|
||||
"clang/3.5.2 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
|
||||
"clang/3.6.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
|
||||
"cuda/6.5.14 $CUDA_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS"
|
||||
"cuda/7.0.28 $CUDA_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS"
|
||||
"cuda/7.5.18 $CUDA_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS"
|
||||
)
|
||||
|
||||
elif [ "$MACHINE" = "white" ]; then
|
||||
source /etc/profile.d/modules.sh
|
||||
SKIP_HWLOC=True
|
||||
export SLURM_TASKS_PER_NODE=32
|
||||
|
||||
BASE_MODULE_LIST="<COMPILER_NAME>/<COMPILER_VERSION>"
|
||||
IBM_MODULE_LIST="<COMPILER_NAME>/xl/<COMPILER_VERSION>"
|
||||
CUDA_MODULE_LIST="<COMPILER_NAME>/<COMPILER_VERSION>,gcc/4.9.2"
|
||||
|
||||
# Don't do pthread on white
|
||||
GCC_BUILD_LIST="OpenMP,Serial,OpenMP_Serial"
|
||||
|
||||
# Format: (compiler module-list build-list exe-name warning-flag)
|
||||
COMPILERS=("gcc/4.9.2 $BASE_MODULE_LIST $IBM_BUILD_LIST g++ $GCC_WARNING_FLAGS"
|
||||
"gcc/5.3.0 $BASE_MODULE_LIST $IBM_BUILD_LIST g++ $GCC_WARNING_FLAGS"
|
||||
"ibm/13.1.3 $IBM_MODULE_LIST $IBM_BUILD_LIST xlC $IBM_WARNING_FLAGS"
|
||||
)
|
||||
|
||||
ARCH_FLAG="--arch=Power8"
|
||||
NUM_JOBS_TO_RUN_IN_PARALLEL=8
|
||||
|
||||
elif [ "$MACHINE" = "bowman" ]; then
|
||||
source /etc/profile.d/modules.sh
|
||||
SKIP_HWLOC=True
|
||||
export SLURM_TASKS_PER_NODE=32
|
||||
|
||||
BASE_MODULE_LIST="<COMPILER_NAME>/compilers/<COMPILER_VERSION>"
|
||||
|
||||
OLD_INTEL_BUILD_LIST="Pthread,Serial,Pthread_Serial"
|
||||
|
||||
# Format: (compiler module-list build-list exe-name warning-flag)
|
||||
COMPILERS=("intel/16.2.181 $BASE_MODULE_LIST $OLD_INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
|
||||
"intel/17.0.064 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
|
||||
)
|
||||
|
||||
ARCH_FLAG="--arch=KNL"
|
||||
NUM_JOBS_TO_RUN_IN_PARALLEL=8
|
||||
|
||||
elif [ "$MACHINE" = "shepard" ]; then
|
||||
source /etc/profile.d/modules.sh
|
||||
SKIP_HWLOC=True
|
||||
export SLURM_TASKS_PER_NODE=32
|
||||
|
||||
BASE_MODULE_LIST="<COMPILER_NAME>/compilers/<COMPILER_VERSION>"
|
||||
|
||||
OLD_INTEL_BUILD_LIST="Pthread,Serial,Pthread_Serial"
|
||||
|
||||
# Format: (compiler module-list build-list exe-name warning-flag)
|
||||
COMPILERS=("intel/16.2.181 $BASE_MODULE_LIST $OLD_INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
|
||||
"intel/17.0.064 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
|
||||
)
|
||||
|
||||
ARCH_FLAG="--arch=HSW"
|
||||
NUM_JOBS_TO_RUN_IN_PARALLEL=8
|
||||
|
||||
else
|
||||
echo "Unhandled machine $MACHINE" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
export OMP_NUM_THREADS=4
|
||||
|
||||
declare -i NUM_RESULTS_TO_KEEP=7
|
||||
|
||||
RESULT_ROOT_PREFIX=TestAll
|
||||
|
||||
SCRIPT_KOKKOS_ROOT=$( cd "$( dirname "$0" )" && cd .. && pwd )
|
||||
|
||||
#
|
||||
# Handle arguments
|
||||
@ -173,7 +86,211 @@ NUM_JOBS_TO_RUN_IN_PARALLEL="${key#*=}"
|
||||
--dry-run*)
|
||||
DRYRUN=True
|
||||
;;
|
||||
--help)
|
||||
--spot-check*)
|
||||
SPOT_CHECK=True
|
||||
;;
|
||||
--arch*)
|
||||
ARCH_FLAG="--arch=${key#*=}"
|
||||
;;
|
||||
--opt-flag*)
|
||||
OPT_FLAG="${key#*=}"
|
||||
;;
|
||||
--with-cuda-options*)
|
||||
KOKKOS_CUDA_OPTIONS="--with-cuda-options=${key#*=}"
|
||||
;;
|
||||
--help*)
|
||||
PRINT_HELP=True
|
||||
;;
|
||||
*)
|
||||
# args, just append
|
||||
ARGS="$ARGS $1"
|
||||
;;
|
||||
esac
|
||||
shift
|
||||
done
|
||||
|
||||
SCRIPT_KOKKOS_ROOT=$( cd "$( dirname "$0" )" && cd .. && pwd )
|
||||
|
||||
# set kokkos path
|
||||
if [ -z "$KOKKOS_PATH" ]; then
|
||||
KOKKOS_PATH=$SCRIPT_KOKKOS_ROOT
|
||||
else
|
||||
# Ensure KOKKOS_PATH is abs path
|
||||
KOKKOS_PATH=$( cd $KOKKOS_PATH && pwd )
|
||||
fi
|
||||
|
||||
#
|
||||
# Machine specific config
|
||||
#
|
||||
|
||||
if [ "$MACHINE" = "sems" ]; then
|
||||
source /projects/sems/modulefiles/utils/sems-modules-init.sh
|
||||
|
||||
BASE_MODULE_LIST="sems-env,kokkos-env,sems-<COMPILER_NAME>/<COMPILER_VERSION>,kokkos-hwloc/1.10.1/base"
|
||||
CUDA_MODULE_LIST="sems-env,kokkos-env,kokkos-<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/4.8.4,kokkos-hwloc/1.10.1/base"
|
||||
CUDA8_MODULE_LIST="sems-env,kokkos-env,kokkos-<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/5.3.0,kokkos-hwloc/1.10.1/base"
|
||||
|
||||
if [ -z "$ARCH_FLAG" ]; then
|
||||
ARCH_FLAG=""
|
||||
fi
|
||||
|
||||
if [ "$SPOT_CHECK" = "True" ]; then
|
||||
# Format: (compiler module-list build-list exe-name warning-flag)
|
||||
COMPILERS=("gcc/4.7.2 $BASE_MODULE_LIST "OpenMP,Pthread" g++ $GCC_WARNING_FLAGS"
|
||||
"gcc/5.1.0 $BASE_MODULE_LIST "Serial" g++ $GCC_WARNING_FLAGS"
|
||||
"intel/16.0.1 $BASE_MODULE_LIST "OpenMP" icpc $INTEL_WARNING_FLAGS"
|
||||
"clang/3.9.0 $BASE_MODULE_LIST "Pthread_Serial" clang++ $CLANG_WARNING_FLAGS"
|
||||
"cuda/8.0.44 $CUDA8_MODULE_LIST "Cuda_OpenMP" $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS"
|
||||
)
|
||||
else
|
||||
# Format: (compiler module-list build-list exe-name warning-flag)
|
||||
COMPILERS=("gcc/4.7.2 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
|
||||
"gcc/4.8.4 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
|
||||
"gcc/4.9.2 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
|
||||
"gcc/5.1.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
|
||||
"intel/14.0.4 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
|
||||
"intel/15.0.2 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
|
||||
"intel/16.0.1 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
|
||||
"clang/3.6.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
|
||||
"clang/3.7.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
|
||||
"clang/3.8.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
|
||||
"clang/3.9.0 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
|
||||
"cuda/7.0.28 $CUDA_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS"
|
||||
"cuda/7.5.18 $CUDA_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS"
|
||||
"cuda/8.0.44 $CUDA8_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS"
|
||||
)
|
||||
fi
|
||||
|
||||
elif [ "$MACHINE" = "white" ]; then
|
||||
source /etc/profile.d/modules.sh
|
||||
SKIP_HWLOC=True
|
||||
export SLURM_TASKS_PER_NODE=32
|
||||
|
||||
BASE_MODULE_LIST="<COMPILER_NAME>/<COMPILER_VERSION>"
|
||||
IBM_MODULE_LIST="<COMPILER_NAME>/xl/<COMPILER_VERSION>"
|
||||
CUDA_MODULE_LIST="<COMPILER_NAME>/<COMPILER_VERSION>,gcc/5.4.0"
|
||||
|
||||
# Don't do pthread on white
|
||||
GCC_BUILD_LIST="OpenMP,Serial,OpenMP_Serial"
|
||||
|
||||
# Format: (compiler module-list build-list exe-name warning-flag)
|
||||
COMPILERS=("gcc/5.4.0 $BASE_MODULE_LIST $IBM_BUILD_LIST g++ $GCC_WARNING_FLAGS"
|
||||
"ibm/13.1.3 $IBM_MODULE_LIST $IBM_BUILD_LIST xlC $IBM_WARNING_FLAGS"
|
||||
"cuda/8.0.44 $CUDA_MODULE_LIST $CUDA_IBM_BUILD_LIST ${KOKKOS_PATH}/config/nvcc_wrapper $CUDA_WARNING_FLAGS"
|
||||
)
|
||||
if [ -z "$ARCH_FLAG" ]; then
|
||||
ARCH_FLAG="--arch=Power8,Kepler37"
|
||||
fi
|
||||
NUM_JOBS_TO_RUN_IN_PARALLEL=2
|
||||
|
||||
elif [ "$MACHINE" = "bowman" ]; then
|
||||
source /etc/profile.d/modules.sh
|
||||
SKIP_HWLOC=True
|
||||
export SLURM_TASKS_PER_NODE=32
|
||||
|
||||
BASE_MODULE_LIST="<COMPILER_NAME>/compilers/<COMPILER_VERSION>"
|
||||
|
||||
OLD_INTEL_BUILD_LIST="Pthread,Serial,Pthread_Serial"
|
||||
|
||||
# Format: (compiler module-list build-list exe-name warning-flag)
|
||||
COMPILERS=("intel/16.2.181 $BASE_MODULE_LIST $OLD_INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
|
||||
"intel/17.0.098 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
|
||||
)
|
||||
|
||||
if [ -z "$ARCH_FLAG" ]; then
|
||||
ARCH_FLAG="--arch=KNL"
|
||||
fi
|
||||
NUM_JOBS_TO_RUN_IN_PARALLEL=2
|
||||
|
||||
elif [ "$MACHINE" = "shepard" ]; then
|
||||
source /etc/profile.d/modules.sh
|
||||
SKIP_HWLOC=True
|
||||
export SLURM_TASKS_PER_NODE=32
|
||||
|
||||
BASE_MODULE_LIST="<COMPILER_NAME>/compilers/<COMPILER_VERSION>"
|
||||
|
||||
OLD_INTEL_BUILD_LIST="Pthread,Serial,Pthread_Serial"
|
||||
|
||||
# Format: (compiler module-list build-list exe-name warning-flag)
|
||||
COMPILERS=("intel/16.2.181 $BASE_MODULE_LIST $OLD_INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
|
||||
"intel/17.0.098 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
|
||||
)
|
||||
|
||||
if [ -z "$ARCH_FLAG" ]; then
|
||||
ARCH_FLAG="--arch=HSW"
|
||||
fi
|
||||
NUM_JOBS_TO_RUN_IN_PARALLEL=2
|
||||
|
||||
elif [ "$MACHINE" = "apollo" ]; then
|
||||
source /projects/sems/modulefiles/utils/sems-modules-init.sh
|
||||
module use /home/projects/modulefiles/local/x86-64
|
||||
module load kokkos-env
|
||||
|
||||
module load sems-git
|
||||
module load sems-tex
|
||||
module load sems-cmake/3.5.2
|
||||
module load sems-gdb
|
||||
|
||||
SKIP_HWLOC=True
|
||||
|
||||
BASE_MODULE_LIST="sems-env,kokkos-env,sems-<COMPILER_NAME>/<COMPILER_VERSION>,kokkos-hwloc/1.10.1/base"
|
||||
CUDA_MODULE_LIST="sems-env,kokkos-env,kokkos-<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/4.8.4,kokkos-hwloc/1.10.1/base"
|
||||
CUDA8_MODULE_LIST="sems-env,kokkos-env,kokkos-<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/5.3.0,kokkos-hwloc/1.10.1/base"
|
||||
|
||||
CLANG_MODULE_LIST="sems-env,kokkos-env,sems-git,sems-cmake/3.5.2,<COMPILER_NAME>/<COMPILER_VERSION>,cuda/8.0.44"
|
||||
NVCC_MODULE_LIST="sems-env,kokkos-env,sems-git,sems-cmake/3.5.2,<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/5.3.0"
|
||||
|
||||
BUILD_LIST_CUDA_NVCC="Cuda_Serial,Cuda_OpenMP"
|
||||
BUILD_LIST_CUDA_CLANG="Cuda_Serial,Cuda_Pthread"
|
||||
BUILD_LIST_CLANG="Serial,Pthread,OpenMP"
|
||||
|
||||
if [ "$SPOT_CHECK" = "True" ]; then
|
||||
# Format: (compiler module-list build-list exe-name warning-flag)
|
||||
COMPILERS=("gcc/4.7.2 $BASE_MODULE_LIST "OpenMP,Pthread" g++ $GCC_WARNING_FLAGS"
|
||||
"gcc/5.1.0 $BASE_MODULE_LIST "Serial" g++ $GCC_WARNING_FLAGS"
|
||||
"intel/16.0.1 $BASE_MODULE_LIST "OpenMP" icpc $INTEL_WARNING_FLAGS"
|
||||
"clang/3.9.0 $BASE_MODULE_LIST "Pthread_Serial" clang++ $CLANG_WARNING_FLAGS"
|
||||
"clang/head $CLANG_MODULE_LIST "Cuda_Pthread" clang++ $CUDA_WARNING_FLAGS"
|
||||
"cuda/8.0.44 $CUDA_MODULE_LIST "Cuda_OpenMP" $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS"
|
||||
)
|
||||
else
|
||||
# Format: (compiler module-list build-list exe-name warning-flag)
|
||||
COMPILERS=("cuda/8.0.44 $CUDA8_MODULE_LIST $BUILD_LIST_CUDA_NVCC $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS"
|
||||
"clang/head $CLANG_MODULE_LIST $BUILD_LIST_CUDA_CLANG clang++ $CUDA_WARNING_FLAGS"
|
||||
"clang/3.9.0 $CLANG_MODULE_LIST $BUILD_LIST_CLANG clang++ $CLANG_WARNING_FLAGS"
|
||||
"gcc/4.7.2 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
|
||||
"gcc/4.8.4 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
|
||||
"gcc/4.9.2 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
|
||||
"gcc/5.3.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
|
||||
"gcc/6.1.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
|
||||
"intel/14.0.4 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
|
||||
"intel/15.0.2 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
|
||||
"intel/16.0.1 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
|
||||
"clang/3.5.2 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
|
||||
"clang/3.6.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
|
||||
"cuda/7.0.28 $CUDA_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS"
|
||||
"cuda/7.5.18 $CUDA_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS"
|
||||
)
|
||||
fi
|
||||
|
||||
if [ -z "$ARCH_FLAG" ]; then
|
||||
ARCH_FLAG="--arch=SNB,Kepler35"
|
||||
fi
|
||||
NUM_JOBS_TO_RUN_IN_PARALLEL=2
|
||||
else
|
||||
echo "Unhandled machine $MACHINE" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
|
||||
|
||||
export OMP_NUM_THREADS=4
|
||||
|
||||
declare -i NUM_RESULTS_TO_KEEP=7
|
||||
|
||||
RESULT_ROOT_PREFIX=TestAll
|
||||
|
||||
if [ "$PRINT_HELP" = "True" ]; then
|
||||
echo "test_all_sandia <ARGS> <OPTIONS>:"
|
||||
echo "--kokkos-path=/Path/To/Kokkos: Path to the Kokkos root directory"
|
||||
echo " Defaults to root repo containing this script"
|
||||
@ -183,6 +300,9 @@ echo "--skip-hwloc: Do not do hwloc tests"
|
||||
echo "--num=N: Number of jobs to run in parallel "
|
||||
echo "--dry-run: Just print what would be executed"
|
||||
echo "--build-only: Just do builds, don't run anything"
|
||||
echo "--opt-flag=FLAG: Optimization flag (default: -O3)"
|
||||
echo "--arch=ARCHITECTURE: overwrite architecture flags"
|
||||
echo "--with-cuda-options=OPT: set KOKKOS_CUDA_OPTIONS"
|
||||
echo "--build-list=BUILD,BUILD,BUILD..."
|
||||
echo " Provide a comma-separated list of builds instead of running all builds"
|
||||
echo " Valid items:"
|
||||
@ -220,21 +340,6 @@ echo " hit ctrl-z"
|
||||
echo " % kill -9 %1"
|
||||
echo
|
||||
exit 0
|
||||
;;
|
||||
*)
|
||||
# args, just append
|
||||
ARGS="$ARGS $1"
|
||||
;;
|
||||
esac
|
||||
shift
|
||||
done
|
||||
|
||||
# set kokkos path
|
||||
if [ -z "$KOKKOS_PATH" ]; then
|
||||
KOKKOS_PATH=$SCRIPT_KOKKOS_ROOT
|
||||
else
|
||||
# Ensure KOKKOS_PATH is abs path
|
||||
KOKKOS_PATH=$( cd $KOKKOS_PATH && pwd )
|
||||
fi
|
||||
|
||||
# set build type
|
||||
@ -381,11 +486,15 @@ single_build_and_test() {
|
||||
local extra_args=--with-hwloc=$(dirname $(dirname $(which hwloc-info)))
|
||||
fi
|
||||
|
||||
if [[ "$OPT_FLAG" = "" ]]; then
|
||||
OPT_FLAG="-O3"
|
||||
fi
|
||||
|
||||
if [[ "$build_type" = *debug* ]]; then
|
||||
local extra_args="$extra_args --debug"
|
||||
local cxxflags="-g $compiler_warning_flags"
|
||||
else
|
||||
local cxxflags="-O3 $compiler_warning_flags"
|
||||
local cxxflags="$OPT_FLAG $compiler_warning_flags"
|
||||
fi
|
||||
|
||||
if [[ "$compiler" == cuda* ]]; then
|
||||
@ -393,7 +502,9 @@ single_build_and_test() {
|
||||
export TMPDIR=$(pwd)
|
||||
fi
|
||||
|
||||
# cxxflags="-DKOKKOS_USING_EXP_VIEW=1 $cxxflags"
|
||||
if [[ "$KOKKOS_CUDA_OPTIONS" != "" ]]; then
|
||||
local extra_args="$extra_args $KOKKOS_CUDA_OPTIONS"
|
||||
fi
|
||||
|
||||
echo " Starting job $desc"
|
||||
|
||||
@ -440,13 +551,14 @@ run_in_background() {
|
||||
local compiler=$1
|
||||
|
||||
local -i num_jobs=$NUM_JOBS_TO_RUN_IN_PARALLEL
|
||||
if [[ "$BUILD_ONLY" == True ]]; then
|
||||
num_jobs=8
|
||||
else
|
||||
# don't override command line input
|
||||
# if [[ "$BUILD_ONLY" == True ]]; then
|
||||
# num_jobs=8
|
||||
# else
|
||||
if [[ "$compiler" == cuda* ]]; then
|
||||
num_jobs=1
|
||||
fi
|
||||
fi
|
||||
# fi
|
||||
wait_for_jobs $num_jobs
|
||||
|
||||
single_build_and_test $* &
|
||||
|
||||
50
lib/kokkos/config/trilinos-integration/prepare_trilinos_repos.sh
Executable file
50
lib/kokkos/config/trilinos-integration/prepare_trilinos_repos.sh
Executable file
@ -0,0 +1,50 @@
|
||||
#!/bin/bash -le
|
||||
|
||||
export TRILINOS_UPDATED_PATH=${PWD}/trilinos-update
|
||||
export TRILINOS_PRISTINE_PATH=${PWD}/trilinos-pristine
|
||||
|
||||
#rm -rf ${KOKKOS_PATH}
|
||||
#rm -rf ${TRILINOS_UPDATED_PATH}
|
||||
#rm -rf ${TRILINOS_PRISTINE_PATH}
|
||||
|
||||
#Already done:
|
||||
if [ ! -d "${TRILINOS_UPDATED_PATH}" ]; then
|
||||
git clone https://github.com/trilinos/trilinos ${TRILINOS_UPDATED_PATH}
|
||||
fi
|
||||
if [ ! -d "${TRILINOS_PRISTINE_PATH}" ]; then
|
||||
git clone https://github.com/trilinos/trilinos ${TRILINOS_PRISTINE_PATH}
|
||||
fi
|
||||
|
||||
cd ${TRILINOS_UPDATED_PATH}
|
||||
git checkout develop
|
||||
git reset --hard origin/develop
|
||||
git pull
|
||||
cd ..
|
||||
|
||||
python kokkos/config/snapshot.py ${KOKKOS_PATH} ${TRILINOS_UPDATED_PATH}/packages
|
||||
|
||||
cd ${TRILINOS_UPDATED_PATH}
|
||||
echo ""
|
||||
echo ""
|
||||
echo "Trilinos State:"
|
||||
git log --pretty=oneline --since=2.days
|
||||
SHA=`git log --pretty=oneline --since=2.days | head -n 2 | tail -n 1 | awk '{print $1}'`
|
||||
cd ..
|
||||
|
||||
cd ${TRILINOS_PRISTINE_PATH}
|
||||
git status
|
||||
git log --pretty=oneline --since=2.days
|
||||
echo "Checkout develop"
|
||||
git checkout develop
|
||||
echo "Pull"
|
||||
git pull
|
||||
echo "Checkout SHA"
|
||||
git checkout ${SHA}
|
||||
cd ..
|
||||
|
||||
cd ${TRILINOS_PRISTINE_PATH}
|
||||
echo ""
|
||||
echo ""
|
||||
echo "Trilinos Pristine State:"
|
||||
git log --pretty=oneline --since=2.days
|
||||
cd ..
|
||||
@ -1,6 +1,6 @@
|
||||
|
||||
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
|
||||
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
|
||||
INCLUDE_DIRECTORIES(REQUIRED_DURING_INSTALLATION_TESTING ${CMAKE_CURRENT_SOURCE_DIR})
|
||||
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../src )
|
||||
|
||||
SET(SOURCES
|
||||
|
||||
@ -7,21 +7,18 @@ vpath %.cpp ${KOKKOS_PATH}/containers/performance_tests
|
||||
default: build_all
|
||||
echo "End Build"
|
||||
|
||||
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
|
||||
CXX = $(KOKKOS_PATH)/config/nvcc_wrapper
|
||||
else
|
||||
CXX = g++
|
||||
endif
|
||||
|
||||
CXXFLAGS = -O3
|
||||
LINK ?= $(CXX)
|
||||
LDFLAGS ?= -lpthread
|
||||
|
||||
include $(KOKKOS_PATH)/Makefile.kokkos
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
|
||||
CXX = $(NVCC_WRAPPER)
|
||||
CXXFLAGS ?= -O3
|
||||
LINK = $(CXX)
|
||||
LDFLAGS ?= -lpthread
|
||||
else
|
||||
CXX ?= g++
|
||||
CXXFLAGS ?= -O3
|
||||
LINK ?= $(CXX)
|
||||
LDFLAGS ?= -lpthread
|
||||
endif
|
||||
|
||||
KOKKOS_CXXFLAGS += -I$(GTEST_PATH) -I${KOKKOS_PATH}/containers/performance_tests
|
||||
|
||||
TEST_TARGETS =
|
||||
|
||||
@ -83,7 +83,7 @@ TEST_F( cuda, dynrankview_perf )
|
||||
{
|
||||
std::cout << "Cuda" << std::endl;
|
||||
std::cout << " DynRankView vs View: Initialization Only " << std::endl;
|
||||
test_dynrankview_op_perf<Kokkos::Cuda>( 4096 );
|
||||
test_dynrankview_op_perf<Kokkos::Cuda>( 40960 );
|
||||
}
|
||||
|
||||
TEST_F( cuda, global_2_local)
|
||||
|
||||
@ -180,8 +180,8 @@ void test_dynrankview_op_perf( const int par_size )
|
||||
|
||||
typedef DeviceType execution_space;
|
||||
typedef typename execution_space::size_type size_type;
|
||||
const size_type dim2 = 900;
|
||||
const size_type dim3 = 300;
|
||||
const size_type dim2 = 90;
|
||||
const size_type dim3 = 30;
|
||||
|
||||
double elapsed_time_view = 0;
|
||||
double elapsed_time_compview = 0;
|
||||
|
||||
@ -261,9 +261,6 @@ public:
|
||||
modified_device (View<unsigned int,LayoutLeft,typename t_host::execution_space> ("DualView::modified_device")),
|
||||
modified_host (View<unsigned int,LayoutLeft,typename t_host::execution_space> ("DualView::modified_host"))
|
||||
{
|
||||
#if ! KOKKOS_USING_EXP_VIEW
|
||||
Impl::assert_shapes_are_equal (d_view.shape (), h_view.shape ());
|
||||
#else
|
||||
if ( int(d_view.rank) != int(h_view.rank) ||
|
||||
d_view.dimension_0() != h_view.dimension_0() ||
|
||||
d_view.dimension_1() != h_view.dimension_1() ||
|
||||
@ -284,7 +281,6 @@ public:
|
||||
d_view.span() != h_view.span() ) {
|
||||
Kokkos::Impl::throw_runtime_exception("DualView constructed with incompatible views");
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
//@}
|
||||
@ -315,13 +311,13 @@ public:
|
||||
template< class Device >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
const typename Impl::if_c<
|
||||
Impl::is_same<typename t_dev::memory_space,
|
||||
std::is_same<typename t_dev::memory_space,
|
||||
typename Device::memory_space>::value,
|
||||
t_dev,
|
||||
t_host>::type& view () const
|
||||
{
|
||||
return Impl::if_c<
|
||||
Impl::is_same<
|
||||
std::is_same<
|
||||
typename t_dev::memory_space,
|
||||
typename Device::memory_space>::value,
|
||||
t_dev,
|
||||
@ -347,13 +343,13 @@ public:
|
||||
/// appropriate template parameter.
|
||||
template<class Device>
|
||||
void sync( const typename Impl::enable_if<
|
||||
( Impl::is_same< typename traits::data_type , typename traits::non_const_data_type>::value) ||
|
||||
( Impl::is_same< Device , int>::value)
|
||||
( std::is_same< typename traits::data_type , typename traits::non_const_data_type>::value) ||
|
||||
( std::is_same< Device , int>::value)
|
||||
, int >::type& = 0)
|
||||
{
|
||||
const unsigned int dev =
|
||||
Impl::if_c<
|
||||
Impl::is_same<
|
||||
std::is_same<
|
||||
typename t_dev::memory_space,
|
||||
typename Device::memory_space>::value ,
|
||||
unsigned int,
|
||||
@ -370,7 +366,7 @@ public:
|
||||
modified_host() = modified_device() = 0;
|
||||
}
|
||||
}
|
||||
if(Impl::is_same<typename t_host::memory_space,typename t_dev::memory_space>::value) {
|
||||
if(std::is_same<typename t_host::memory_space,typename t_dev::memory_space>::value) {
|
||||
t_dev::execution_space::fence();
|
||||
t_host::execution_space::fence();
|
||||
}
|
||||
@ -378,13 +374,13 @@ public:
|
||||
|
||||
template<class Device>
|
||||
void sync ( const typename Impl::enable_if<
|
||||
( ! Impl::is_same< typename traits::data_type , typename traits::non_const_data_type>::value ) ||
|
||||
( Impl::is_same< Device , int>::value)
|
||||
( ! std::is_same< typename traits::data_type , typename traits::non_const_data_type>::value ) ||
|
||||
( std::is_same< Device , int>::value)
|
||||
, int >::type& = 0 )
|
||||
{
|
||||
const unsigned int dev =
|
||||
Impl::if_c<
|
||||
Impl::is_same<
|
||||
std::is_same<
|
||||
typename t_dev::memory_space,
|
||||
typename Device::memory_space>::value,
|
||||
unsigned int,
|
||||
@ -405,7 +401,7 @@ public:
|
||||
{
|
||||
const unsigned int dev =
|
||||
Impl::if_c<
|
||||
Impl::is_same<
|
||||
std::is_same<
|
||||
typename t_dev::memory_space,
|
||||
typename Device::memory_space>::value ,
|
||||
unsigned int,
|
||||
@ -431,7 +427,7 @@ public:
|
||||
void modify () {
|
||||
const unsigned int dev =
|
||||
Impl::if_c<
|
||||
Impl::is_same<
|
||||
std::is_same<
|
||||
typename t_dev::memory_space,
|
||||
typename Device::memory_space>::value,
|
||||
unsigned int,
|
||||
@ -514,11 +510,7 @@ public:
|
||||
|
||||
//! The allocation size (same as Kokkos::View::capacity).
|
||||
size_t capacity() const {
|
||||
#if KOKKOS_USING_EXP_VIEW
|
||||
return d_view.span();
|
||||
#else
|
||||
return d_view.capacity();
|
||||
#endif
|
||||
}
|
||||
|
||||
//! Get stride(s) for each dimension.
|
||||
@ -555,8 +547,6 @@ public:
|
||||
// Partial specializations of Kokkos::subview() for DualView objects.
|
||||
//
|
||||
|
||||
#if KOKKOS_USING_EXP_VIEW
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
@ -590,352 +580,6 @@ subview( const DualView<D,A1,A2,A3> & src , Args ... args )
|
||||
|
||||
} /* namespace Kokkos */
|
||||
|
||||
#else
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
//
|
||||
// Partial specializations of Kokkos::subview() for DualView objects.
|
||||
//
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
template< class SrcDataType , class SrcArg1Type , class SrcArg2Type , class SrcArg3Type
|
||||
, class SubArg0_type , class SubArg1_type , class SubArg2_type , class SubArg3_type
|
||||
, class SubArg4_type , class SubArg5_type , class SubArg6_type , class SubArg7_type
|
||||
>
|
||||
struct ViewSubview< DualView< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type >
|
||||
, SubArg0_type , SubArg1_type , SubArg2_type , SubArg3_type
|
||||
, SubArg4_type , SubArg5_type , SubArg6_type , SubArg7_type >
|
||||
{
|
||||
private:
|
||||
|
||||
typedef DualView< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type > SrcViewType ;
|
||||
|
||||
enum { V0 = Impl::is_same< SubArg0_type , void >::value ? 1 : 0 };
|
||||
enum { V1 = Impl::is_same< SubArg1_type , void >::value ? 1 : 0 };
|
||||
enum { V2 = Impl::is_same< SubArg2_type , void >::value ? 1 : 0 };
|
||||
enum { V3 = Impl::is_same< SubArg3_type , void >::value ? 1 : 0 };
|
||||
enum { V4 = Impl::is_same< SubArg4_type , void >::value ? 1 : 0 };
|
||||
enum { V5 = Impl::is_same< SubArg5_type , void >::value ? 1 : 0 };
|
||||
enum { V6 = Impl::is_same< SubArg6_type , void >::value ? 1 : 0 };
|
||||
enum { V7 = Impl::is_same< SubArg7_type , void >::value ? 1 : 0 };
|
||||
|
||||
// The source view rank must be equal to the input argument rank
|
||||
// Once a void argument is encountered all subsequent arguments must be void.
|
||||
enum { InputRank =
|
||||
Impl::StaticAssert<( SrcViewType::rank ==
|
||||
( V0 ? 0 : (
|
||||
V1 ? 1 : (
|
||||
V2 ? 2 : (
|
||||
V3 ? 3 : (
|
||||
V4 ? 4 : (
|
||||
V5 ? 5 : (
|
||||
V6 ? 6 : (
|
||||
V7 ? 7 : 8 ))))))) ))
|
||||
&&
|
||||
( SrcViewType::rank ==
|
||||
( 8 - ( V0 + V1 + V2 + V3 + V4 + V5 + V6 + V7 ) ) )
|
||||
>::value ? SrcViewType::rank : 0 };
|
||||
|
||||
enum { R0 = Impl::ViewOffsetRange< SubArg0_type >::is_range ? 1 : 0 };
|
||||
enum { R1 = Impl::ViewOffsetRange< SubArg1_type >::is_range ? 1 : 0 };
|
||||
enum { R2 = Impl::ViewOffsetRange< SubArg2_type >::is_range ? 1 : 0 };
|
||||
enum { R3 = Impl::ViewOffsetRange< SubArg3_type >::is_range ? 1 : 0 };
|
||||
enum { R4 = Impl::ViewOffsetRange< SubArg4_type >::is_range ? 1 : 0 };
|
||||
enum { R5 = Impl::ViewOffsetRange< SubArg5_type >::is_range ? 1 : 0 };
|
||||
enum { R6 = Impl::ViewOffsetRange< SubArg6_type >::is_range ? 1 : 0 };
|
||||
enum { R7 = Impl::ViewOffsetRange< SubArg7_type >::is_range ? 1 : 0 };
|
||||
|
||||
enum { OutputRank = unsigned(R0) + unsigned(R1) + unsigned(R2) + unsigned(R3)
|
||||
+ unsigned(R4) + unsigned(R5) + unsigned(R6) + unsigned(R7) };
|
||||
|
||||
// Reverse
|
||||
enum { R0_rev = 0 == InputRank ? 0u : (
|
||||
1 == InputRank ? unsigned(R0) : (
|
||||
2 == InputRank ? unsigned(R1) : (
|
||||
3 == InputRank ? unsigned(R2) : (
|
||||
4 == InputRank ? unsigned(R3) : (
|
||||
5 == InputRank ? unsigned(R4) : (
|
||||
6 == InputRank ? unsigned(R5) : (
|
||||
7 == InputRank ? unsigned(R6) : unsigned(R7) ))))))) };
|
||||
|
||||
typedef typename SrcViewType::array_layout SrcViewLayout ;
|
||||
|
||||
// Choose array layout, attempting to preserve original layout if at all possible.
|
||||
typedef typename Impl::if_c<
|
||||
( // Same Layout IF
|
||||
// OutputRank 0
|
||||
( OutputRank == 0 )
|
||||
||
|
||||
// OutputRank 1 or 2, InputLayout Left, Interval 0
|
||||
// because single stride one or second index has a stride.
|
||||
( OutputRank <= 2 && R0 && Impl::is_same<SrcViewLayout,LayoutLeft>::value )
|
||||
||
|
||||
// OutputRank 1 or 2, InputLayout Right, Interval [InputRank-1]
|
||||
// because single stride one or second index has a stride.
|
||||
( OutputRank <= 2 && R0_rev && Impl::is_same<SrcViewLayout,LayoutRight>::value )
|
||||
), SrcViewLayout , Kokkos::LayoutStride >::type OutputViewLayout ;
|
||||
|
||||
// Choose data type as a purely dynamic rank array to accomodate a runtime range.
|
||||
typedef typename Impl::if_c< OutputRank == 0 , typename SrcViewType::value_type ,
|
||||
typename Impl::if_c< OutputRank == 1 , typename SrcViewType::value_type *,
|
||||
typename Impl::if_c< OutputRank == 2 , typename SrcViewType::value_type **,
|
||||
typename Impl::if_c< OutputRank == 3 , typename SrcViewType::value_type ***,
|
||||
typename Impl::if_c< OutputRank == 4 , typename SrcViewType::value_type ****,
|
||||
typename Impl::if_c< OutputRank == 5 , typename SrcViewType::value_type *****,
|
||||
typename Impl::if_c< OutputRank == 6 , typename SrcViewType::value_type ******,
|
||||
typename Impl::if_c< OutputRank == 7 , typename SrcViewType::value_type *******,
|
||||
typename SrcViewType::value_type ********
|
||||
>::type >::type >::type >::type >::type >::type >::type >::type OutputData ;
|
||||
|
||||
// Choose space.
|
||||
// If the source view's template arg1 or arg2 is a space then use it,
|
||||
// otherwise use the source view's execution space.
|
||||
|
||||
typedef typename Impl::if_c< Impl::is_space< SrcArg1Type >::value , SrcArg1Type ,
|
||||
typename Impl::if_c< Impl::is_space< SrcArg2Type >::value , SrcArg2Type , typename SrcViewType::execution_space
|
||||
>::type >::type OutputSpace ;
|
||||
|
||||
public:
|
||||
|
||||
// If keeping the layout then match non-data type arguments
|
||||
// else keep execution space and memory traits.
|
||||
typedef typename
|
||||
Impl::if_c< Impl::is_same< SrcViewLayout , OutputViewLayout >::value
|
||||
, Kokkos::DualView< OutputData , SrcArg1Type , SrcArg2Type , SrcArg3Type >
|
||||
, Kokkos::DualView< OutputData , OutputViewLayout , OutputSpace
|
||||
, typename SrcViewType::memory_traits >
|
||||
>::type type ;
|
||||
};
|
||||
|
||||
} /* namespace Impl */
|
||||
} /* namespace Kokkos */
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
template< class D , class A1 , class A2 , class A3 ,
|
||||
class ArgType0 >
|
||||
typename Impl::ViewSubview< DualView<D,A1,A2,A3>
|
||||
, ArgType0 , void , void , void
|
||||
, void , void , void , void
|
||||
>::type
|
||||
subview( const DualView<D,A1,A2,A3> & src ,
|
||||
const ArgType0 & arg0 )
|
||||
{
|
||||
typedef typename
|
||||
Impl::ViewSubview< DualView<D,A1,A2,A3>
|
||||
, ArgType0 , void , void , void
|
||||
, void , void , void , void
|
||||
>::type
|
||||
DstViewType ;
|
||||
DstViewType sub_view;
|
||||
sub_view.d_view = subview(src.d_view,arg0);
|
||||
sub_view.h_view = subview(src.h_view,arg0);
|
||||
sub_view.modified_device = src.modified_device;
|
||||
sub_view.modified_host = src.modified_host;
|
||||
return sub_view;
|
||||
}
|
||||
|
||||
|
||||
template< class D , class A1 , class A2 , class A3 ,
|
||||
class ArgType0 , class ArgType1 >
|
||||
typename Impl::ViewSubview< DualView<D,A1,A2,A3>
|
||||
, ArgType0 , ArgType1 , void , void
|
||||
, void , void , void , void
|
||||
>::type
|
||||
subview( const DualView<D,A1,A2,A3> & src ,
|
||||
const ArgType0 & arg0 ,
|
||||
const ArgType1 & arg1 )
|
||||
{
|
||||
typedef typename
|
||||
Impl::ViewSubview< DualView<D,A1,A2,A3>
|
||||
, ArgType0 , ArgType1 , void , void
|
||||
, void , void , void , void
|
||||
>::type
|
||||
DstViewType ;
|
||||
DstViewType sub_view;
|
||||
sub_view.d_view = subview(src.d_view,arg0,arg1);
|
||||
sub_view.h_view = subview(src.h_view,arg0,arg1);
|
||||
sub_view.modified_device = src.modified_device;
|
||||
sub_view.modified_host = src.modified_host;
|
||||
return sub_view;
|
||||
}
|
||||
|
||||
template< class D , class A1 , class A2 , class A3 ,
|
||||
class ArgType0 , class ArgType1 , class ArgType2 >
|
||||
typename Impl::ViewSubview< DualView<D,A1,A2,A3>
|
||||
, ArgType0 , ArgType1 , ArgType2 , void
|
||||
, void , void , void , void
|
||||
>::type
|
||||
subview( const DualView<D,A1,A2,A3> & src ,
|
||||
const ArgType0 & arg0 ,
|
||||
const ArgType1 & arg1 ,
|
||||
const ArgType2 & arg2 )
|
||||
{
|
||||
typedef typename
|
||||
Impl::ViewSubview< DualView<D,A1,A2,A3>
|
||||
, ArgType0 , ArgType1 , ArgType2 , void
|
||||
, void , void , void , void
|
||||
>::type
|
||||
DstViewType ;
|
||||
DstViewType sub_view;
|
||||
sub_view.d_view = subview(src.d_view,arg0,arg1,arg2);
|
||||
sub_view.h_view = subview(src.h_view,arg0,arg1,arg2);
|
||||
sub_view.modified_device = src.modified_device;
|
||||
sub_view.modified_host = src.modified_host;
|
||||
return sub_view;
|
||||
}
|
||||
|
||||
template< class D , class A1 , class A2 , class A3 ,
|
||||
class ArgType0 , class ArgType1 , class ArgType2 , class ArgType3 >
|
||||
typename Impl::ViewSubview< DualView<D,A1,A2,A3>
|
||||
, ArgType0 , ArgType1 , ArgType2 , ArgType3
|
||||
, void , void , void , void
|
||||
>::type
|
||||
subview( const DualView<D,A1,A2,A3> & src ,
|
||||
const ArgType0 & arg0 ,
|
||||
const ArgType1 & arg1 ,
|
||||
const ArgType2 & arg2 ,
|
||||
const ArgType3 & arg3 )
|
||||
{
|
||||
typedef typename
|
||||
Impl::ViewSubview< DualView<D,A1,A2,A3>
|
||||
, ArgType0 , ArgType1 , ArgType2 , ArgType3
|
||||
, void , void , void , void
|
||||
>::type
|
||||
DstViewType ;
|
||||
DstViewType sub_view;
|
||||
sub_view.d_view = subview(src.d_view,arg0,arg1,arg2,arg3);
|
||||
sub_view.h_view = subview(src.h_view,arg0,arg1,arg2,arg3);
|
||||
sub_view.modified_device = src.modified_device;
|
||||
sub_view.modified_host = src.modified_host;
|
||||
return sub_view;
|
||||
}
|
||||
|
||||
template< class D , class A1 , class A2 , class A3 ,
|
||||
class ArgType0 , class ArgType1 , class ArgType2 , class ArgType3 ,
|
||||
class ArgType4 >
|
||||
typename Impl::ViewSubview< DualView<D,A1,A2,A3>
|
||||
, ArgType0 , ArgType1 , ArgType2 , ArgType3
|
||||
, ArgType4 , void , void , void
|
||||
>::type
|
||||
subview( const DualView<D,A1,A2,A3> & src ,
|
||||
const ArgType0 & arg0 ,
|
||||
const ArgType1 & arg1 ,
|
||||
const ArgType2 & arg2 ,
|
||||
const ArgType3 & arg3 ,
|
||||
const ArgType4 & arg4 )
|
||||
{
|
||||
typedef typename
|
||||
Impl::ViewSubview< DualView<D,A1,A2,A3>
|
||||
, ArgType0 , ArgType1 , ArgType2 , ArgType3
|
||||
, ArgType4 , void , void ,void
|
||||
>::type
|
||||
DstViewType ;
|
||||
DstViewType sub_view;
|
||||
sub_view.d_view = subview(src.d_view,arg0,arg1,arg2,arg3,arg4);
|
||||
sub_view.h_view = subview(src.h_view,arg0,arg1,arg2,arg3,arg4);
|
||||
sub_view.modified_device = src.modified_device;
|
||||
sub_view.modified_host = src.modified_host;
|
||||
return sub_view;
|
||||
}
|
||||
|
||||
template< class D , class A1 , class A2 , class A3 ,
|
||||
class ArgType0 , class ArgType1 , class ArgType2 , class ArgType3 ,
|
||||
class ArgType4 , class ArgType5 >
|
||||
typename Impl::ViewSubview< DualView<D,A1,A2,A3>
|
||||
, ArgType0 , ArgType1 , ArgType2 , ArgType3
|
||||
, ArgType4 , ArgType5 , void , void
|
||||
>::type
|
||||
subview( const DualView<D,A1,A2,A3> & src ,
|
||||
const ArgType0 & arg0 ,
|
||||
const ArgType1 & arg1 ,
|
||||
const ArgType2 & arg2 ,
|
||||
const ArgType3 & arg3 ,
|
||||
const ArgType4 & arg4 ,
|
||||
const ArgType5 & arg5 )
|
||||
{
|
||||
typedef typename
|
||||
Impl::ViewSubview< DualView<D,A1,A2,A3>
|
||||
, ArgType0 , ArgType1 , ArgType2 , ArgType3
|
||||
, ArgType4 , ArgType5 , void , void
|
||||
>::type
|
||||
DstViewType ;
|
||||
DstViewType sub_view;
|
||||
sub_view.d_view = subview(src.d_view,arg0,arg1,arg2,arg3,arg4,arg5);
|
||||
sub_view.h_view = subview(src.h_view,arg0,arg1,arg2,arg3,arg4,arg5);
|
||||
sub_view.modified_device = src.modified_device;
|
||||
sub_view.modified_host = src.modified_host;
|
||||
return sub_view;
|
||||
}
|
||||
|
||||
template< class D , class A1 , class A2 , class A3 ,
|
||||
class ArgType0 , class ArgType1 , class ArgType2 , class ArgType3 ,
|
||||
class ArgType4 , class ArgType5 , class ArgType6 >
|
||||
typename Impl::ViewSubview< DualView<D,A1,A2,A3>
|
||||
, ArgType0 , ArgType1 , ArgType2 , ArgType3
|
||||
, ArgType4 , ArgType5 , ArgType6 , void
|
||||
>::type
|
||||
subview( const DualView<D,A1,A2,A3> & src ,
|
||||
const ArgType0 & arg0 ,
|
||||
const ArgType1 & arg1 ,
|
||||
const ArgType2 & arg2 ,
|
||||
const ArgType3 & arg3 ,
|
||||
const ArgType4 & arg4 ,
|
||||
const ArgType5 & arg5 ,
|
||||
const ArgType6 & arg6 )
|
||||
{
|
||||
typedef typename
|
||||
Impl::ViewSubview< DualView<D,A1,A2,A3>
|
||||
, ArgType0 , ArgType1 , ArgType2 , ArgType3
|
||||
, ArgType4 , ArgType5 , ArgType6 , void
|
||||
>::type
|
||||
DstViewType ;
|
||||
DstViewType sub_view;
|
||||
sub_view.d_view = subview(src.d_view,arg0,arg1,arg2,arg3,arg4,arg5,arg6);
|
||||
sub_view.h_view = subview(src.h_view,arg0,arg1,arg2,arg3,arg4,arg5,arg6);
|
||||
sub_view.modified_device = src.modified_device;
|
||||
sub_view.modified_host = src.modified_host;
|
||||
return sub_view;
|
||||
}
|
||||
|
||||
template< class D , class A1 , class A2 , class A3 ,
|
||||
class ArgType0 , class ArgType1 , class ArgType2 , class ArgType3 ,
|
||||
class ArgType4 , class ArgType5 , class ArgType6 , class ArgType7 >
|
||||
typename Impl::ViewSubview< DualView<D,A1,A2,A3>
|
||||
, ArgType0 , ArgType1 , ArgType2 , ArgType3
|
||||
, ArgType4 , ArgType5 , ArgType6 , ArgType7
|
||||
>::type
|
||||
subview( const DualView<D,A1,A2,A3> & src ,
|
||||
const ArgType0 & arg0 ,
|
||||
const ArgType1 & arg1 ,
|
||||
const ArgType2 & arg2 ,
|
||||
const ArgType3 & arg3 ,
|
||||
const ArgType4 & arg4 ,
|
||||
const ArgType5 & arg5 ,
|
||||
const ArgType6 & arg6 ,
|
||||
const ArgType7 & arg7 )
|
||||
{
|
||||
typedef typename
|
||||
Impl::ViewSubview< DualView<D,A1,A2,A3>
|
||||
, ArgType0 , ArgType1 , ArgType2 , ArgType3
|
||||
, ArgType4 , ArgType5 , ArgType6 , ArgType7
|
||||
>::type
|
||||
DstViewType ;
|
||||
DstViewType sub_view;
|
||||
sub_view.d_view = subview(src.d_view,arg0,arg1,arg2,arg3,arg4,arg5,arg6,arg7);
|
||||
sub_view.h_view = subview(src.h_view,arg0,arg1,arg2,arg3,arg4,arg5,arg6,arg7);
|
||||
sub_view.modified_device = src.modified_device;
|
||||
sub_view.modified_host = src.modified_host;
|
||||
return sub_view;
|
||||
}
|
||||
|
||||
} // namespace Kokkos
|
||||
|
||||
#endif /* KOKKOS_USING_EXP_VIEW */
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
|
||||
@ -223,14 +223,85 @@ struct DynRankDimTraits {
|
||||
);
|
||||
}
|
||||
|
||||
template < typename DynRankViewType , typename iType >
|
||||
void verify_dynrankview_rank ( iType N , const DynRankViewType &drv )
|
||||
{
|
||||
if ( static_cast<iType>(drv.rank()) > N )
|
||||
{
|
||||
Kokkos::abort( "Need at least rank arguments to the operator()" );
|
||||
}
|
||||
|
||||
/** \brief Debug bounds-checking routines */
|
||||
// Enhanced debug checking - most infrastructure matches that of functions in
|
||||
// Kokkos_ViewMapping; additional checks for extra arguments beyond rank are 0
|
||||
template< unsigned , typename iType0 , class MapType >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
bool dyn_rank_view_verify_operator_bounds( const iType0 & , const MapType & )
|
||||
{ return true ; }
|
||||
|
||||
template< unsigned R , typename iType0 , class MapType , typename iType1 , class ... Args >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
bool dyn_rank_view_verify_operator_bounds
|
||||
( const iType0 & rank
|
||||
, const MapType & map
|
||||
, const iType1 & i
|
||||
, Args ... args
|
||||
)
|
||||
{
|
||||
if ( static_cast<iType0>(R) < rank ) {
|
||||
return ( size_t(i) < map.extent(R) )
|
||||
&& dyn_rank_view_verify_operator_bounds<R+1>( rank , map , args ... );
|
||||
}
|
||||
else if ( i != 0 ) {
|
||||
printf("DynRankView Debug Bounds Checking Error: at rank %u\n Extra arguments beyond the rank must be zero \n",R);
|
||||
return ( false )
|
||||
&& dyn_rank_view_verify_operator_bounds<R+1>( rank , map , args ... );
|
||||
}
|
||||
else {
|
||||
return ( true )
|
||||
&& dyn_rank_view_verify_operator_bounds<R+1>( rank , map , args ... );
|
||||
}
|
||||
}
|
||||
|
||||
template< unsigned , class MapType >
|
||||
inline
|
||||
void dyn_rank_view_error_operator_bounds( char * , int , const MapType & )
|
||||
{}
|
||||
|
||||
template< unsigned R , class MapType , class iType , class ... Args >
|
||||
inline
|
||||
void dyn_rank_view_error_operator_bounds
|
||||
( char * buf
|
||||
, int len
|
||||
, const MapType & map
|
||||
, const iType & i
|
||||
, Args ... args
|
||||
)
|
||||
{
|
||||
const int n =
|
||||
snprintf(buf,len," %ld < %ld %c"
|
||||
, static_cast<unsigned long>(i)
|
||||
, static_cast<unsigned long>( map.extent(R) )
|
||||
, ( sizeof...(Args) ? ',' : ')' )
|
||||
);
|
||||
dyn_rank_view_error_operator_bounds<R+1>(buf+n,len-n,map,args...);
|
||||
}
|
||||
|
||||
// op_rank = rank of the operator version that was called
|
||||
template< typename iType0 , typename iType1 , class MapType , class ... Args >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void dyn_rank_view_verify_operator_bounds
|
||||
( const iType0 & op_rank , const iType1 & rank , const char* label , const MapType & map , Args ... args )
|
||||
{
|
||||
if ( static_cast<iType0>(rank) > op_rank ) {
|
||||
Kokkos::abort( "DynRankView Bounds Checking Error: Need at least rank arguments to the operator()" );
|
||||
}
|
||||
|
||||
if ( ! dyn_rank_view_verify_operator_bounds<0>( rank , map , args ... ) ) {
|
||||
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
|
||||
enum { LEN = 1024 };
|
||||
char buffer[ LEN ];
|
||||
int n = snprintf(buffer,LEN,"DynRankView bounds error of view %s (", label);
|
||||
dyn_rank_view_error_operator_bounds<0>( buffer + n , LEN - n , map , args ... );
|
||||
Kokkos::Impl::throw_runtime_exception(std::string(buffer));
|
||||
#else
|
||||
Kokkos::abort("DynRankView bounds error");
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/** \brief Assign compatible default mappings */
|
||||
@ -341,7 +412,6 @@ class DynRankView : public ViewTraits< DataType , Properties ... >
|
||||
|
||||
private:
|
||||
template < class , class ... > friend class DynRankView ;
|
||||
// template < class , class ... > friend class Kokkos::Experimental::View ; //unnecessary now...
|
||||
template < class , class ... > friend class Impl::ViewMapping ;
|
||||
|
||||
public:
|
||||
@ -504,20 +574,26 @@ private:
|
||||
( is_layout_left || is_layout_right || is_layout_stride )
|
||||
};
|
||||
|
||||
template< class Space , bool = Kokkos::Impl::MemorySpaceAccess< Space , typename traits::memory_space >::accessible > struct verify_space
|
||||
{ KOKKOS_FORCEINLINE_FUNCTION static void check() {} };
|
||||
|
||||
template< class Space > struct verify_space<Space,false>
|
||||
{ KOKKOS_FORCEINLINE_FUNCTION static void check()
|
||||
{ Kokkos::abort("Kokkos::DynRankView ERROR: attempt to access inaccessible memory space"); };
|
||||
};
|
||||
|
||||
// Bounds checking macros
|
||||
#if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK )
|
||||
|
||||
#define KOKKOS_VIEW_OPERATOR_VERIFY( N , ARG ) \
|
||||
Kokkos::Impl::VerifyExecutionCanAccessMemorySpace \
|
||||
< Kokkos::Impl::ActiveExecutionMemorySpace , typename traits::memory_space >::verify(); \
|
||||
Kokkos::Experimental::Impl::verify_dynrankview_rank ( N , *this ) ; \
|
||||
Kokkos::Experimental::Impl::view_verify_operator_bounds ARG ;
|
||||
// rank of the calling operator - included as first argument in ARG
|
||||
#define KOKKOS_VIEW_OPERATOR_VERIFY( ARG ) \
|
||||
DynRankView::template verify_space< Kokkos::Impl::ActiveExecutionMemorySpace >::check(); \
|
||||
Kokkos::Experimental::Impl::dyn_rank_view_verify_operator_bounds ARG ;
|
||||
|
||||
#else
|
||||
|
||||
#define KOKKOS_VIEW_OPERATOR_VERIFY( N , ARG ) \
|
||||
Kokkos::Impl::VerifyExecutionCanAccessMemorySpace \
|
||||
< Kokkos::Impl::ActiveExecutionMemorySpace , typename traits::memory_space >::verify();
|
||||
#define KOKKOS_VIEW_OPERATOR_VERIFY( ARG ) \
|
||||
DynRankView::template verify_space< Kokkos::Impl::ActiveExecutionMemorySpace >::check();
|
||||
|
||||
#endif
|
||||
|
||||
@ -532,7 +608,11 @@ public:
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
reference_type operator()() const
|
||||
{
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( 0 , ( implementation_map() ) )
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (0 , this->rank() , NULL , m_map) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (0 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map) )
|
||||
#endif
|
||||
return implementation_map().reference();
|
||||
//return m_map.reference(0,0,0,0,0,0,0);
|
||||
}
|
||||
@ -563,12 +643,17 @@ public:
|
||||
return rankone_view(i0);
|
||||
}
|
||||
|
||||
// Rank 1 parenthesis
|
||||
template< typename iType >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
typename std::enable_if< (std::is_same<typename traits::specialize , void>::value && std::is_integral<iType>::value), reference_type>::type
|
||||
operator()(const iType & i0 ) const
|
||||
{
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( 1 , ( m_map , i0 ) )
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (1 , this->rank() , NULL , m_map , i0) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (1 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0) )
|
||||
#endif
|
||||
return m_map.reference(i0);
|
||||
}
|
||||
|
||||
@ -577,6 +662,11 @@ public:
|
||||
typename std::enable_if< !(std::is_same<typename traits::specialize , void>::value && std::is_integral<iType>::value), reference_type>::type
|
||||
operator()(const iType & i0 ) const
|
||||
{
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (1 , this->rank() , NULL , m_map , i0) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (1 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0) )
|
||||
#endif
|
||||
return m_map.reference(i0,0,0,0,0,0,0);
|
||||
}
|
||||
|
||||
@ -586,7 +676,11 @@ public:
|
||||
typename std::enable_if< (std::is_same<typename traits::specialize , void>::value && std::is_integral<iType0>::value && std::is_integral<iType1>::value), reference_type>::type
|
||||
operator()(const iType0 & i0 , const iType1 & i1 ) const
|
||||
{
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( 2 , ( m_map , i0 , i1 ) )
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (2 , this->rank() , NULL , m_map , i0 , i1) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (2 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1) )
|
||||
#endif
|
||||
return m_map.reference(i0,i1);
|
||||
}
|
||||
|
||||
@ -595,7 +689,11 @@ public:
|
||||
typename std::enable_if< !(std::is_same<typename drvtraits::specialize , void>::value && std::is_integral<iType0>::value), reference_type>::type
|
||||
operator()(const iType0 & i0 , const iType1 & i1 ) const
|
||||
{
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( 2 , ( m_map , i0 , i1 ) )
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (2 , this->rank() , NULL , m_map , i0 , i1) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (2 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1) )
|
||||
#endif
|
||||
return m_map.reference(i0,i1,0,0,0,0,0);
|
||||
}
|
||||
|
||||
@ -605,7 +703,11 @@ public:
|
||||
typename std::enable_if< (std::is_same<typename traits::specialize , void>::value && std::is_integral<iType0>::value && std::is_integral<iType1>::value && std::is_integral<iType2>::value), reference_type>::type
|
||||
operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 ) const
|
||||
{
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( 3 , ( m_map , i0 , i1 , i2 ) )
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (3 , this->rank() , NULL , m_map , i0 , i1 , i2) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (3 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2) )
|
||||
#endif
|
||||
return m_map.reference(i0,i1,i2);
|
||||
}
|
||||
|
||||
@ -614,7 +716,11 @@ public:
|
||||
typename std::enable_if< !(std::is_same<typename drvtraits::specialize , void>::value && std::is_integral<iType0>::value), reference_type>::type
|
||||
operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 ) const
|
||||
{
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( 3 , ( m_map , i0 , i1 , i2 ) )
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (3 , this->rank() , NULL , m_map , i0 , i1 , i2) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (3 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2) )
|
||||
#endif
|
||||
return m_map.reference(i0,i1,i2,0,0,0,0);
|
||||
}
|
||||
|
||||
@ -624,7 +730,11 @@ public:
|
||||
typename std::enable_if< (std::is_same<typename traits::specialize , void>::value && std::is_integral<iType0>::value && std::is_integral<iType1>::value && std::is_integral<iType2>::value && std::is_integral<iType3>::value), reference_type>::type
|
||||
operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ) const
|
||||
{
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( 4 , ( m_map , i0 , i1 , i2 , i3 ) )
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (4 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (4 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3) )
|
||||
#endif
|
||||
return m_map.reference(i0,i1,i2,i3);
|
||||
}
|
||||
|
||||
@ -633,7 +743,11 @@ public:
|
||||
typename std::enable_if< !(std::is_same<typename drvtraits::specialize , void>::value && std::is_integral<iType0>::value), reference_type>::type
|
||||
operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ) const
|
||||
{
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( 4 , ( m_map , i0 , i1 , i2 , i3 ) )
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (4 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (4 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3) )
|
||||
#endif
|
||||
return m_map.reference(i0,i1,i2,i3,0,0,0);
|
||||
}
|
||||
|
||||
@ -643,7 +757,11 @@ public:
|
||||
typename std::enable_if< (std::is_same<typename traits::specialize , void>::value && std::is_integral<iType0>::value && std::is_integral<iType1>::value && std::is_integral<iType2>::value && std::is_integral<iType3>::value && std::is_integral<iType4>::value), reference_type>::type
|
||||
operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 ) const
|
||||
{
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( 5 , ( m_map , i0 , i1 , i2 , i3 , i4 ) )
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (5 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3, i4) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (5 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4) )
|
||||
#endif
|
||||
return m_map.reference(i0,i1,i2,i3,i4);
|
||||
}
|
||||
|
||||
@ -652,7 +770,11 @@ public:
|
||||
typename std::enable_if< !(std::is_same<typename drvtraits::specialize , void>::value && std::is_integral<iType0>::value), reference_type>::type
|
||||
operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 ) const
|
||||
{
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( 5 , ( m_map , i0 , i1 , i2 , i3 , i4 ) )
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (5 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3, i4) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (5 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4) )
|
||||
#endif
|
||||
return m_map.reference(i0,i1,i2,i3,i4,0,0);
|
||||
}
|
||||
|
||||
@ -662,7 +784,11 @@ public:
|
||||
typename std::enable_if< (std::is_same<typename traits::specialize , void>::value && std::is_integral<iType0>::value && std::is_integral<iType1>::value && std::is_integral<iType2>::value && std::is_integral<iType3>::value && std::is_integral<iType4>::value && std::is_integral<iType5>::value), reference_type>::type
|
||||
operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 , const iType5 & i5 ) const
|
||||
{
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( 6 , ( m_map , i0 , i1 , i2 , i3 , i4 , i5 ) )
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (6 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3, i4 , i5) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (6 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5) )
|
||||
#endif
|
||||
return m_map.reference(i0,i1,i2,i3,i4,i5);
|
||||
}
|
||||
|
||||
@ -671,7 +797,11 @@ public:
|
||||
typename std::enable_if< !(std::is_same<typename drvtraits::specialize , void>::value && std::is_integral<iType0>::value), reference_type>::type
|
||||
operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 , const iType5 & i5 ) const
|
||||
{
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( 6 , ( m_map , i0 , i1 , i2 , i3 , i4 , i5 ) )
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (6 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3, i4 , i5) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (6 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5) )
|
||||
#endif
|
||||
return m_map.reference(i0,i1,i2,i3,i4,i5,0);
|
||||
}
|
||||
|
||||
@ -681,7 +811,11 @@ public:
|
||||
typename std::enable_if< (std::is_integral<iType0>::value && std::is_integral<iType1>::value && std::is_integral<iType2>::value && std::is_integral<iType3>::value && std::is_integral<iType4>::value && std::is_integral<iType5>::value && std::is_integral<iType6>::value), reference_type>::type
|
||||
operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 , const iType5 & i5 , const iType6 & i6 ) const
|
||||
{
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( 7 , ( m_map , i0 , i1 , i2 , i3 , i4 , i5 , i6 ) )
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (7 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3, i4 , i5 , i6) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (7 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,i6) )
|
||||
#endif
|
||||
return m_map.reference(i0,i1,i2,i3,i4,i5,i6);
|
||||
}
|
||||
|
||||
@ -1136,13 +1270,13 @@ private:
|
||||
|
||||
public:
|
||||
|
||||
typedef Kokkos::Experimental::ViewTraits
|
||||
typedef Kokkos::ViewTraits
|
||||
< data_type
|
||||
, array_layout
|
||||
, typename SrcTraits::device_type
|
||||
, typename SrcTraits::memory_traits > traits_type ;
|
||||
|
||||
typedef Kokkos::Experimental::View
|
||||
typedef Kokkos::View
|
||||
< data_type
|
||||
, array_layout
|
||||
, typename SrcTraits::device_type
|
||||
@ -1154,13 +1288,13 @@ public:
|
||||
|
||||
static_assert( Kokkos::Impl::is_memory_traits< MemoryTraits >::value , "" );
|
||||
|
||||
typedef Kokkos::Experimental::ViewTraits
|
||||
typedef Kokkos::ViewTraits
|
||||
< data_type
|
||||
, array_layout
|
||||
, typename SrcTraits::device_type
|
||||
, MemoryTraits > traits_type ;
|
||||
|
||||
typedef Kokkos::Experimental::View
|
||||
typedef Kokkos::View
|
||||
< data_type
|
||||
, array_layout
|
||||
, typename SrcTraits::device_type
|
||||
@ -1264,7 +1398,7 @@ subdynrankview( const Kokkos::Experimental::DynRankView< D , P... > &src , Args.
|
||||
if ( src.rank() > sizeof...(Args) ) //allow sizeof...(Args) >= src.rank(), ignore the remaining args
|
||||
{ Kokkos::abort("subdynrankview: num of args must be >= rank of the source DynRankView"); }
|
||||
|
||||
typedef Kokkos::Experimental::Impl::ViewMapping< Kokkos::Experimental::Impl::DynRankSubviewTag , Kokkos::Experimental::ViewTraits< D*******, P... > , Args... > metafcn ;
|
||||
typedef Kokkos::Experimental::Impl::ViewMapping< Kokkos::Experimental::Impl::DynRankSubviewTag , Kokkos::ViewTraits< D*******, P... > , Args... > metafcn ;
|
||||
|
||||
return metafcn::subview( src.rank() , src , args... );
|
||||
}
|
||||
@ -1502,10 +1636,10 @@ void deep_copy
|
||||
typedef typename src_type::memory_space src_memory_space ;
|
||||
|
||||
enum { DstExecCanAccessSrc =
|
||||
Kokkos::Impl::VerifyExecutionCanAccessMemorySpace< typename dst_execution_space::memory_space , src_memory_space >::value };
|
||||
Kokkos::Impl::SpaceAccessibility< dst_execution_space , src_memory_space >::accessible };
|
||||
|
||||
enum { SrcExecCanAccessDst =
|
||||
Kokkos::Impl::VerifyExecutionCanAccessMemorySpace< typename src_execution_space::memory_space , dst_memory_space >::value };
|
||||
Kokkos::Impl::SpaceAccessibility< src_execution_space , dst_memory_space >::accessible };
|
||||
|
||||
if ( (void *) dst.data() != (void*) src.data() ) {
|
||||
|
||||
@ -1666,7 +1800,7 @@ inline
|
||||
typename DynRankView<T,P...>::HostMirror
|
||||
create_mirror( const DynRankView<T,P...> & src
|
||||
, typename std::enable_if<
|
||||
! std::is_same< typename Kokkos::Experimental::ViewTraits<T,P...>::array_layout
|
||||
! std::is_same< typename Kokkos::ViewTraits<T,P...>::array_layout
|
||||
, Kokkos::LayoutStride >::value
|
||||
>::type * = 0
|
||||
)
|
||||
@ -1684,7 +1818,7 @@ inline
|
||||
typename DynRankView<T,P...>::HostMirror
|
||||
create_mirror( const DynRankView<T,P...> & src
|
||||
, typename std::enable_if<
|
||||
std::is_same< typename Kokkos::Experimental::ViewTraits<T,P...>::array_layout
|
||||
std::is_same< typename Kokkos::ViewTraits<T,P...>::array_layout
|
||||
, Kokkos::LayoutStride >::value
|
||||
>::type * = 0
|
||||
)
|
||||
@ -1779,7 +1913,7 @@ void resize( DynRankView<T,P...> & v ,
|
||||
{
|
||||
typedef DynRankView<T,P...> drview_type ;
|
||||
|
||||
static_assert( Kokkos::Experimental::ViewTraits<T,P...>::is_managed , "Can only resize managed views" );
|
||||
static_assert( Kokkos::ViewTraits<T,P...>::is_managed , "Can only resize managed views" );
|
||||
|
||||
drview_type v_resized( v.label(), n0, n1, n2, n3, n4, n5, n6 );
|
||||
|
||||
@ -1803,7 +1937,7 @@ void realloc( DynRankView<T,P...> & v ,
|
||||
{
|
||||
typedef DynRankView<T,P...> drview_type ;
|
||||
|
||||
static_assert( Kokkos::Experimental::ViewTraits<T,P...>::is_managed , "Can only realloc managed views" );
|
||||
static_assert( Kokkos::ViewTraits<T,P...>::is_managed , "Can only realloc managed views" );
|
||||
|
||||
const std::string label = v.label();
|
||||
|
||||
|
||||
@ -56,7 +56,7 @@ namespace Experimental {
|
||||
* Subviews are not allowed.
|
||||
*/
|
||||
template< typename DataType , typename ... P >
|
||||
class DynamicView : public Kokkos::Experimental::ViewTraits< DataType , P ... >
|
||||
class DynamicView : public Kokkos::ViewTraits< DataType , P ... >
|
||||
{
|
||||
public:
|
||||
|
||||
@ -75,6 +75,15 @@ private:
|
||||
std::is_same< typename traits::specialize , void >::value
|
||||
, "DynamicView must have trivial data type" );
|
||||
|
||||
|
||||
template< class Space , bool = Kokkos::Impl::MemorySpaceAccess< Space , typename traits::memory_space >::accessible > struct verify_space
|
||||
{ KOKKOS_FORCEINLINE_FUNCTION static void check() {} };
|
||||
|
||||
template< class Space > struct verify_space<Space,false>
|
||||
{ KOKKOS_FORCEINLINE_FUNCTION static void check()
|
||||
{ Kokkos::abort("Kokkos::DynamicView ERROR: attempt to access inaccessible memory space"); };
|
||||
};
|
||||
|
||||
public:
|
||||
|
||||
typedef Kokkos::Experimental::MemoryPool< typename traits::device_type > memory_pool ;
|
||||
@ -117,10 +126,10 @@ public:
|
||||
KOKKOS_INLINE_FUNCTION constexpr size_t size() const
|
||||
{
|
||||
return
|
||||
Kokkos::Impl::VerifyExecutionCanAccessMemorySpace
|
||||
Kokkos::Impl::MemorySpaceAccess
|
||||
< Kokkos::Impl::ActiveExecutionMemorySpace
|
||||
, typename traits::memory_space
|
||||
>::value
|
||||
>::accessible
|
||||
? // Runtime size is at the end of the chunk pointer array
|
||||
(*reinterpret_cast<const uintptr_t*>( m_chunks + m_chunk_max ))
|
||||
<< m_chunk_shift
|
||||
@ -179,10 +188,7 @@ public:
|
||||
static_assert( Kokkos::Impl::are_integral<I0,Args...>::value
|
||||
, "Indices must be integral type" );
|
||||
|
||||
Kokkos::Impl::VerifyExecutionCanAccessMemorySpace
|
||||
< Kokkos::Impl::ActiveExecutionMemorySpace
|
||||
, typename traits::memory_space
|
||||
>::verify();
|
||||
DynamicView::template verify_space< Kokkos::Impl::ActiveExecutionMemorySpace >::check();
|
||||
|
||||
// Which chunk is being indexed.
|
||||
const uintptr_t ic = uintptr_t( i0 >> m_chunk_shift );
|
||||
@ -223,15 +229,13 @@ public:
|
||||
{
|
||||
typedef typename traits::value_type value_type ;
|
||||
|
||||
Kokkos::Impl::VerifyExecutionCanAccessMemorySpace
|
||||
< Kokkos::Impl::ActiveExecutionMemorySpace
|
||||
, typename traits::memory_space >::verify();
|
||||
DynamicView::template verify_space< Kokkos::Impl::ActiveExecutionMemorySpace >::check();
|
||||
|
||||
const uintptr_t NC = ( n + m_chunk_mask ) >> m_chunk_shift ;
|
||||
|
||||
if ( m_chunk_max < NC ) {
|
||||
#if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK )
|
||||
printf("DynamicView::resize_parallel(%lu) m_chunk_max(%lu) NC(%lu)\n"
|
||||
printf("DynamicView::resize_parallel(%lu) m_chunk_max(%u) NC(%lu)\n"
|
||||
, n , m_chunk_max , NC );
|
||||
#endif
|
||||
Kokkos::abort("DynamicView::resize_parallel exceeded maximum size");
|
||||
@ -269,9 +273,7 @@ public:
|
||||
inline
|
||||
void resize_serial( size_t n )
|
||||
{
|
||||
Kokkos::Impl::VerifyExecutionCanAccessMemorySpace
|
||||
< Kokkos::Impl::ActiveExecutionMemorySpace
|
||||
, typename traits::memory_space >::verify();
|
||||
DynamicView::template verify_space< Kokkos::Impl::ActiveExecutionMemorySpace >::check();
|
||||
|
||||
const uintptr_t NC = ( n + m_chunk_mask ) >> m_chunk_shift ;
|
||||
|
||||
@ -398,9 +400,7 @@ public:
|
||||
, m_chunk_mask( ( 1 << m_chunk_shift ) - 1 )
|
||||
, m_chunk_max( ( arg_size_max + m_chunk_mask ) >> m_chunk_shift )
|
||||
{
|
||||
Kokkos::Impl::VerifyExecutionCanAccessMemorySpace
|
||||
< Kokkos::Impl::ActiveExecutionMemorySpace
|
||||
, typename traits::memory_space >::verify();
|
||||
DynamicView::template verify_space< Kokkos::Impl::ActiveExecutionMemorySpace >::check();
|
||||
|
||||
// A functor to deallocate all of the chunks upon final destruction
|
||||
|
||||
@ -452,7 +452,7 @@ void deep_copy( const View<T,DP...> & dst
|
||||
typedef typename ViewTraits<T,SP...>::memory_space src_memory_space ;
|
||||
|
||||
enum { DstExecCanAccessSrc =
|
||||
Kokkos::Impl::VerifyExecutionCanAccessMemorySpace< typename dst_execution_space::memory_space , src_memory_space >::value };
|
||||
Kokkos::Impl::SpaceAccessibility< dst_execution_space , src_memory_space >::accessible };
|
||||
|
||||
if ( DstExecCanAccessSrc ) {
|
||||
// Copying data between views in accessible memory spaces and either non-contiguous or incompatible shape.
|
||||
@ -476,7 +476,7 @@ void deep_copy( const DynamicView<T,DP...> & dst
|
||||
typedef typename ViewTraits<T,SP...>::memory_space src_memory_space ;
|
||||
|
||||
enum { DstExecCanAccessSrc =
|
||||
Kokkos::Impl::VerifyExecutionCanAccessMemorySpace< typename dst_execution_space::memory_space , src_memory_space >::value };
|
||||
Kokkos::Impl::SpaceAccessibility< dst_execution_space , src_memory_space >::accessible };
|
||||
|
||||
if ( DstExecCanAccessSrc ) {
|
||||
// Copying data between views in accessible memory spaces and either non-contiguous or incompatible shape.
|
||||
|
||||
196
lib/kokkos/containers/src/Kokkos_ErrorReporter.hpp
Normal file
196
lib/kokkos/containers/src/Kokkos_ErrorReporter.hpp
Normal file
@ -0,0 +1,196 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_EXPERIMENTAL_ERROR_REPORTER_HPP
|
||||
#define KOKKOS_EXPERIMENTAL_ERROR_REPORTER_HPP
|
||||
|
||||
#include <vector>
|
||||
#include <Kokkos_Core.hpp>
|
||||
#include <Kokkos_View.hpp>
|
||||
#include <Kokkos_DualView.hpp>
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Experimental {
|
||||
|
||||
template <typename ReportType, typename DeviceType>
|
||||
class ErrorReporter
|
||||
{
|
||||
public:
|
||||
|
||||
typedef ReportType report_type;
|
||||
typedef DeviceType device_type;
|
||||
typedef typename device_type::execution_space execution_space;
|
||||
|
||||
ErrorReporter(int max_results)
|
||||
: m_numReportsAttempted(""),
|
||||
m_reports("", max_results),
|
||||
m_reporters("", max_results)
|
||||
{
|
||||
clear();
|
||||
}
|
||||
|
||||
int getCapacity() const { return m_reports.h_view.dimension_0(); }
|
||||
|
||||
int getNumReports();
|
||||
|
||||
int getNumReportAttempts();
|
||||
|
||||
void getReports(std::vector<int> &reporters_out, std::vector<report_type> &reports_out);
|
||||
void getReports( typename Kokkos::View<int*, typename DeviceType::execution_space >::HostMirror &reporters_out,
|
||||
typename Kokkos::View<report_type*, typename DeviceType::execution_space >::HostMirror &reports_out);
|
||||
|
||||
void clear();
|
||||
|
||||
void resize(const size_t new_size);
|
||||
|
||||
bool full() {return (getNumReportAttempts() >= getCapacity()); }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
bool add_report(int reporter_id, report_type report) const
|
||||
{
|
||||
int idx = Kokkos::atomic_fetch_add(&m_numReportsAttempted(), 1);
|
||||
|
||||
if (idx >= 0 && (idx < static_cast<int>(m_reports.d_view.dimension_0()))) {
|
||||
m_reporters.d_view(idx) = reporter_id;
|
||||
m_reports.d_view(idx) = report;
|
||||
return true;
|
||||
}
|
||||
else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
typedef Kokkos::View<report_type *, execution_space> reports_view_t;
|
||||
typedef Kokkos::DualView<report_type *, execution_space> reports_dualview_t;
|
||||
|
||||
typedef typename reports_dualview_t::host_mirror_space host_mirror_space;
|
||||
Kokkos::View<int, execution_space> m_numReportsAttempted;
|
||||
reports_dualview_t m_reports;
|
||||
Kokkos::DualView<int *, execution_space> m_reporters;
|
||||
|
||||
};
|
||||
|
||||
|
||||
template <typename ReportType, typename DeviceType>
|
||||
inline int ErrorReporter<ReportType, DeviceType>::getNumReports()
|
||||
{
|
||||
int num_reports = 0;
|
||||
Kokkos::deep_copy(num_reports,m_numReportsAttempted);
|
||||
if (num_reports > static_cast<int>(m_reports.h_view.dimension_0())) {
|
||||
num_reports = m_reports.h_view.dimension_0();
|
||||
}
|
||||
return num_reports;
|
||||
}
|
||||
|
||||
template <typename ReportType, typename DeviceType>
|
||||
inline int ErrorReporter<ReportType, DeviceType>::getNumReportAttempts()
|
||||
{
|
||||
int num_reports = 0;
|
||||
Kokkos::deep_copy(num_reports,m_numReportsAttempted);
|
||||
return num_reports;
|
||||
}
|
||||
|
||||
template <typename ReportType, typename DeviceType>
|
||||
void ErrorReporter<ReportType, DeviceType>::getReports(std::vector<int> &reporters_out, std::vector<report_type> &reports_out)
|
||||
{
|
||||
int num_reports = getNumReports();
|
||||
reporters_out.clear();
|
||||
reporters_out.reserve(num_reports);
|
||||
reports_out.clear();
|
||||
reports_out.reserve(num_reports);
|
||||
|
||||
if (num_reports > 0) {
|
||||
m_reports.template sync<host_mirror_space>();
|
||||
m_reporters.template sync<host_mirror_space>();
|
||||
|
||||
for (int i = 0; i < num_reports; ++i) {
|
||||
reporters_out.push_back(m_reporters.h_view(i));
|
||||
reports_out.push_back(m_reports.h_view(i));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename ReportType, typename DeviceType>
|
||||
void ErrorReporter<ReportType, DeviceType>::getReports(
|
||||
typename Kokkos::View<int*, typename DeviceType::execution_space >::HostMirror &reporters_out,
|
||||
typename Kokkos::View<report_type*, typename DeviceType::execution_space >::HostMirror &reports_out)
|
||||
{
|
||||
int num_reports = getNumReports();
|
||||
reporters_out = typename Kokkos::View<int*, typename DeviceType::execution_space >::HostMirror("ErrorReport::reporters_out",num_reports);
|
||||
reports_out = typename Kokkos::View<report_type*, typename DeviceType::execution_space >::HostMirror("ErrorReport::reports_out",num_reports);
|
||||
|
||||
if (num_reports > 0) {
|
||||
m_reports.template sync<host_mirror_space>();
|
||||
m_reporters.template sync<host_mirror_space>();
|
||||
|
||||
for (int i = 0; i < num_reports; ++i) {
|
||||
reporters_out(i) = m_reporters.h_view(i);
|
||||
reports_out(i) = m_reports.h_view(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename ReportType, typename DeviceType>
|
||||
void ErrorReporter<ReportType, DeviceType>::clear()
|
||||
{
|
||||
int num_reports=0;
|
||||
Kokkos::deep_copy(m_numReportsAttempted, num_reports);
|
||||
m_reports.template modify<execution_space>();
|
||||
m_reporters.template modify<execution_space>();
|
||||
}
|
||||
|
||||
template <typename ReportType, typename DeviceType>
|
||||
void ErrorReporter<ReportType, DeviceType>::resize(const size_t new_size)
|
||||
{
|
||||
m_reports.resize(new_size);
|
||||
m_reporters.resize(new_size);
|
||||
Kokkos::fence();
|
||||
}
|
||||
|
||||
|
||||
} // namespace Experimental
|
||||
} // namespace kokkos
|
||||
|
||||
#endif
|
||||
@ -1,531 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_SEGMENTED_VIEW_HPP_
|
||||
#define KOKKOS_SEGMENTED_VIEW_HPP_
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
#include <impl/Kokkos_Error.hpp>
|
||||
#include <cstdio>
|
||||
|
||||
#if ! KOKKOS_USING_EXP_VIEW
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Experimental {
|
||||
|
||||
namespace Impl {
|
||||
|
||||
template<class DataType, class Arg1Type, class Arg2Type, class Arg3Type>
|
||||
struct delete_segmented_view;
|
||||
|
||||
template<class MemorySpace>
|
||||
inline
|
||||
void DeviceSetAllocatableMemorySize(size_t) {}
|
||||
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
|
||||
template<>
|
||||
inline
|
||||
void DeviceSetAllocatableMemorySize<Kokkos::CudaSpace>(size_t size) {
|
||||
#ifdef __CUDACC__
|
||||
size_t size_limit;
|
||||
cudaDeviceGetLimit(&size_limit,cudaLimitMallocHeapSize);
|
||||
if(size_limit<size)
|
||||
cudaDeviceSetLimit(cudaLimitMallocHeapSize,2*size);
|
||||
cudaDeviceGetLimit(&size_limit,cudaLimitMallocHeapSize);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<>
|
||||
inline
|
||||
void DeviceSetAllocatableMemorySize<Kokkos::CudaUVMSpace>(size_t size) {
|
||||
#ifdef __CUDACC__
|
||||
size_t size_limit;
|
||||
cudaDeviceGetLimit(&size_limit,cudaLimitMallocHeapSize);
|
||||
if(size_limit<size)
|
||||
cudaDeviceSetLimit(cudaLimitMallocHeapSize,2*size);
|
||||
cudaDeviceGetLimit(&size_limit,cudaLimitMallocHeapSize);
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif /* #if defined( KOKKOS_HAVE_CUDA ) */
|
||||
|
||||
}
|
||||
|
||||
template< class DataType ,
|
||||
class Arg1Type = void ,
|
||||
class Arg2Type = void ,
|
||||
class Arg3Type = void>
|
||||
class SegmentedView : public Kokkos::ViewTraits< DataType , Arg1Type , Arg2Type, Arg3Type >
|
||||
{
|
||||
public:
|
||||
//! \name Typedefs for device types and various Kokkos::View specializations.
|
||||
//@{
|
||||
typedef Kokkos::ViewTraits< DataType , Arg1Type , Arg2Type, Arg3Type > traits ;
|
||||
|
||||
//! The type of a Kokkos::View on the device.
|
||||
typedef Kokkos::View< typename traits::data_type ,
|
||||
typename traits::array_layout ,
|
||||
typename traits::memory_space ,
|
||||
Kokkos::MemoryUnmanaged > t_dev ;
|
||||
|
||||
|
||||
private:
|
||||
Kokkos::View<t_dev*,typename traits::memory_space> segments_;
|
||||
|
||||
Kokkos::View<int,typename traits::memory_space> realloc_lock;
|
||||
Kokkos::View<int,typename traits::memory_space> nsegments_;
|
||||
|
||||
size_t segment_length_;
|
||||
size_t segment_length_m1_;
|
||||
int max_segments_;
|
||||
|
||||
int segment_length_log2;
|
||||
|
||||
// Dimensions, cardinality, capacity, and offset computation for
|
||||
// multidimensional array view of contiguous memory.
|
||||
// Inherits from Impl::Shape
|
||||
typedef Kokkos::Impl::ViewOffset< typename traits::shape_type
|
||||
, typename traits::array_layout
|
||||
> offset_map_type ;
|
||||
|
||||
offset_map_type m_offset_map ;
|
||||
|
||||
typedef Kokkos::View< typename traits::array_intrinsic_type ,
|
||||
typename traits::array_layout ,
|
||||
typename traits::memory_space ,
|
||||
typename traits::memory_traits > array_type ;
|
||||
|
||||
typedef Kokkos::View< typename traits::const_data_type ,
|
||||
typename traits::array_layout ,
|
||||
typename traits::memory_space ,
|
||||
typename traits::memory_traits > const_type ;
|
||||
|
||||
typedef Kokkos::View< typename traits::non_const_data_type ,
|
||||
typename traits::array_layout ,
|
||||
typename traits::memory_space ,
|
||||
typename traits::memory_traits > non_const_type ;
|
||||
|
||||
typedef Kokkos::View< typename traits::non_const_data_type ,
|
||||
typename traits::array_layout ,
|
||||
HostSpace ,
|
||||
void > HostMirror ;
|
||||
|
||||
template< bool Accessible >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
typename Kokkos::Impl::enable_if< Accessible , typename traits::size_type >::type
|
||||
dimension_0_intern() const { return nsegments_() * segment_length_ ; }
|
||||
|
||||
template< bool Accessible >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
typename Kokkos::Impl::enable_if< ! Accessible , typename traits::size_type >::type
|
||||
dimension_0_intern() const
|
||||
{
|
||||
// In Host space
|
||||
int n = 0 ;
|
||||
#if ! defined( __CUDA_ARCH__ )
|
||||
Kokkos::Impl::DeepCopy< HostSpace , typename traits::memory_space >( & n , nsegments_.ptr_on_device() , sizeof(int) );
|
||||
#endif
|
||||
|
||||
return n * segment_length_ ;
|
||||
}
|
||||
|
||||
public:
|
||||
|
||||
enum { Rank = traits::rank };
|
||||
|
||||
KOKKOS_INLINE_FUNCTION offset_map_type shape() const { return m_offset_map ; }
|
||||
|
||||
/* \brief return (current) size of dimension 0 */
|
||||
KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_0() const {
|
||||
enum { Accessible = Kokkos::Impl::VerifyExecutionCanAccessMemorySpace<
|
||||
Kokkos::Impl::ActiveExecutionMemorySpace, typename traits::memory_space >::value };
|
||||
int n = SegmentedView::dimension_0_intern< Accessible >();
|
||||
return n ;
|
||||
}
|
||||
|
||||
/* \brief return size of dimension 1 */
|
||||
KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_1() const { return m_offset_map.N1 ; }
|
||||
/* \brief return size of dimension 2 */
|
||||
KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_2() const { return m_offset_map.N2 ; }
|
||||
/* \brief return size of dimension 3 */
|
||||
KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_3() const { return m_offset_map.N3 ; }
|
||||
/* \brief return size of dimension 4 */
|
||||
KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_4() const { return m_offset_map.N4 ; }
|
||||
/* \brief return size of dimension 5 */
|
||||
KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_5() const { return m_offset_map.N5 ; }
|
||||
/* \brief return size of dimension 6 */
|
||||
KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_6() const { return m_offset_map.N6 ; }
|
||||
/* \brief return size of dimension 7 */
|
||||
KOKKOS_INLINE_FUNCTION typename traits::size_type dimension_7() const { return m_offset_map.N7 ; }
|
||||
|
||||
/* \brief return size of dimension 2 */
|
||||
KOKKOS_INLINE_FUNCTION typename traits::size_type size() const {
|
||||
return dimension_0() *
|
||||
m_offset_map.N1 * m_offset_map.N2 * m_offset_map.N3 * m_offset_map.N4 *
|
||||
m_offset_map.N5 * m_offset_map.N6 * m_offset_map.N7 ;
|
||||
}
|
||||
|
||||
template< typename iType >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
typename traits::size_type dimension( const iType & i ) const {
|
||||
if(i==0)
|
||||
return dimension_0();
|
||||
else
|
||||
return Kokkos::Impl::dimension( m_offset_map , i );
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
typename traits::size_type capacity() {
|
||||
return segments_.dimension_0() *
|
||||
m_offset_map.N1 * m_offset_map.N2 * m_offset_map.N3 * m_offset_map.N4 *
|
||||
m_offset_map.N5 * m_offset_map.N6 * m_offset_map.N7;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
typename traits::size_type get_num_segments() {
|
||||
enum { Accessible = Kokkos::Impl::VerifyExecutionCanAccessMemorySpace<
|
||||
Kokkos::Impl::ActiveExecutionMemorySpace, typename traits::memory_space >::value };
|
||||
int n = SegmentedView::dimension_0_intern< Accessible >();
|
||||
return n/segment_length_ ;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
typename traits::size_type get_max_segments() {
|
||||
return max_segments_;
|
||||
}
|
||||
|
||||
/// \brief Constructor that allocates View objects with an initial length of 0.
|
||||
///
|
||||
/// This constructor works mostly like the analogous constructor of View.
|
||||
/// The first argument is a string label, which is entirely for your
|
||||
/// benefit. (Different SegmentedView objects may have the same label if
|
||||
/// you like.) The second argument 'view_length' is the size of the segments.
|
||||
/// This number must be a power of two. The third argument n0 is the maximum
|
||||
/// value for the first dimension of the segmented view. The maximal allocatable
|
||||
/// number of Segments is thus: (n0+view_length-1)/view_length.
|
||||
/// The arguments that follow are the other dimensions of the (1-7) of the
|
||||
/// View objects. For example, for a View with 3 runtime dimensions,
|
||||
/// the first 4 integer arguments will be nonzero:
|
||||
/// SegmentedView("Name",32768,10000000,8,4). This allocates a SegmentedView
|
||||
/// with a maximum of 306 segments of dimension (32768,8,4). The logical size of
|
||||
/// the segmented view is (n,8,4) with n between 0 and 10000000.
|
||||
/// You may omit the integer arguments that follow.
|
||||
template< class LabelType >
|
||||
SegmentedView(const LabelType & label ,
|
||||
const size_t view_length ,
|
||||
const size_t n0 ,
|
||||
const size_t n1 = 0 ,
|
||||
const size_t n2 = 0 ,
|
||||
const size_t n3 = 0 ,
|
||||
const size_t n4 = 0 ,
|
||||
const size_t n5 = 0 ,
|
||||
const size_t n6 = 0 ,
|
||||
const size_t n7 = 0
|
||||
): segment_length_(view_length),segment_length_m1_(view_length-1)
|
||||
{
|
||||
segment_length_log2 = -1;
|
||||
size_t l = segment_length_;
|
||||
while(l>0) {
|
||||
l>>=1;
|
||||
segment_length_log2++;
|
||||
}
|
||||
l = 1<<segment_length_log2;
|
||||
if(l!=segment_length_)
|
||||
Kokkos::Impl::throw_runtime_exception("Kokkos::SegmentedView requires a 'power of 2' segment length");
|
||||
|
||||
max_segments_ = (n0+segment_length_m1_)/segment_length_;
|
||||
|
||||
Impl::DeviceSetAllocatableMemorySize<typename traits::memory_space>(segment_length_*max_segments_*sizeof(typename traits::value_type));
|
||||
|
||||
segments_ = Kokkos::View<t_dev*,typename traits::execution_space>(label , max_segments_);
|
||||
realloc_lock = Kokkos::View<int,typename traits::execution_space>("Lock");
|
||||
nsegments_ = Kokkos::View<int,typename traits::execution_space>("nviews");
|
||||
m_offset_map.assign( n0, n1, n2, n3, n4, n5, n6, n7, n0*n1*n2*n3*n4*n5*n6*n7 );
|
||||
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
SegmentedView(const SegmentedView& src):
|
||||
segments_(src.segments_),
|
||||
realloc_lock (src.realloc_lock),
|
||||
nsegments_ (src.nsegments_),
|
||||
segment_length_(src.segment_length_),
|
||||
segment_length_m1_(src.segment_length_m1_),
|
||||
max_segments_ (src.max_segments_),
|
||||
segment_length_log2(src.segment_length_log2),
|
||||
m_offset_map (src.m_offset_map)
|
||||
{}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
SegmentedView& operator= (const SegmentedView& src) {
|
||||
segments_ = src.segments_;
|
||||
realloc_lock = src.realloc_lock;
|
||||
nsegments_ = src.nsegments_;
|
||||
segment_length_= src.segment_length_;
|
||||
segment_length_m1_= src.segment_length_m1_;
|
||||
max_segments_ = src.max_segments_;
|
||||
segment_length_log2= src.segment_length_log2;
|
||||
m_offset_map = src.m_offset_map;
|
||||
return *this;
|
||||
}
|
||||
|
||||
~SegmentedView() {
|
||||
if ( !segments_.tracker().ref_counting()) { return; }
|
||||
size_t ref_count = segments_.tracker().ref_count();
|
||||
if(ref_count == 1u) {
|
||||
Kokkos::fence();
|
||||
typename Kokkos::View<int,typename traits::execution_space>::HostMirror h_nviews("h_nviews");
|
||||
Kokkos::deep_copy(h_nviews,nsegments_);
|
||||
Kokkos::parallel_for(h_nviews(),Impl::delete_segmented_view<DataType , Arg1Type , Arg2Type, Arg3Type>(*this));
|
||||
}
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
t_dev get_segment(const int& i) const {
|
||||
return segments_[i];
|
||||
}
|
||||
|
||||
template< class MemberType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void grow (MemberType& team_member, const size_t& growSize) const {
|
||||
if (growSize>max_segments_*segment_length_) {
|
||||
printf ("Exceeding maxSize: %lu %lu\n", growSize, max_segments_*segment_length_);
|
||||
return;
|
||||
}
|
||||
|
||||
if(team_member.team_rank()==0) {
|
||||
bool too_small = growSize > segment_length_ * nsegments_();
|
||||
if (too_small) {
|
||||
while(Kokkos::atomic_compare_exchange(&realloc_lock(),0,1) )
|
||||
; // get the lock
|
||||
too_small = growSize > segment_length_ * nsegments_(); // Recheck once we have the lock
|
||||
if(too_small) {
|
||||
while(too_small) {
|
||||
const size_t alloc_size = segment_length_*m_offset_map.N1*m_offset_map.N2*m_offset_map.N3*
|
||||
m_offset_map.N4*m_offset_map.N5*m_offset_map.N6*m_offset_map.N7;
|
||||
typename traits::non_const_value_type* const ptr = new typename traits::non_const_value_type[alloc_size];
|
||||
|
||||
segments_(nsegments_()) =
|
||||
t_dev(ptr,segment_length_,m_offset_map.N1,m_offset_map.N2,m_offset_map.N3,m_offset_map.N4,m_offset_map.N5,m_offset_map.N6,m_offset_map.N7);
|
||||
nsegments_()++;
|
||||
too_small = growSize > segment_length_ * nsegments_();
|
||||
}
|
||||
}
|
||||
realloc_lock() = 0; //release the lock
|
||||
}
|
||||
}
|
||||
team_member.team_barrier();
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void grow_non_thread_safe (const size_t& growSize) const {
|
||||
if (growSize>max_segments_*segment_length_) {
|
||||
printf ("Exceeding maxSize: %lu %lu\n", growSize, max_segments_*segment_length_);
|
||||
return;
|
||||
}
|
||||
bool too_small = growSize > segment_length_ * nsegments_();
|
||||
if(too_small) {
|
||||
while(too_small) {
|
||||
const size_t alloc_size = segment_length_*m_offset_map.N1*m_offset_map.N2*m_offset_map.N3*
|
||||
m_offset_map.N4*m_offset_map.N5*m_offset_map.N6*m_offset_map.N7;
|
||||
typename traits::non_const_value_type* const ptr =
|
||||
new typename traits::non_const_value_type[alloc_size];
|
||||
|
||||
segments_(nsegments_()) =
|
||||
t_dev (ptr, segment_length_, m_offset_map.N1, m_offset_map.N2,
|
||||
m_offset_map.N3, m_offset_map.N4, m_offset_map.N5,
|
||||
m_offset_map.N6, m_offset_map.N7);
|
||||
nsegments_()++;
|
||||
too_small = growSize > segment_length_ * nsegments_();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template< typename iType0 >
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
typename std::enable_if<( std::is_integral<iType0>::value && traits::rank == 1 )
|
||||
, typename traits::value_type &
|
||||
>::type
|
||||
operator() ( const iType0 & i0 ) const
|
||||
{
|
||||
return segments_[i0>>segment_length_log2](i0&(segment_length_m1_));
|
||||
}
|
||||
|
||||
template< typename iType0 , typename iType1 >
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
typename std::enable_if<( std::is_integral<iType0>::value &&
|
||||
std::is_integral<iType1>::value &&
|
||||
traits::rank == 2 )
|
||||
, typename traits::value_type &
|
||||
>::type
|
||||
operator() ( const iType0 & i0 , const iType1 & i1 ) const
|
||||
{
|
||||
return segments_[i0>>segment_length_log2](i0&(segment_length_m1_),i1);
|
||||
}
|
||||
|
||||
template< typename iType0 , typename iType1 , typename iType2 >
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
typename std::enable_if<( std::is_integral<iType0>::value &&
|
||||
std::is_integral<iType1>::value &&
|
||||
std::is_integral<iType2>::value &&
|
||||
traits::rank == 3 )
|
||||
, typename traits::value_type &
|
||||
>::type
|
||||
operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 ) const
|
||||
{
|
||||
return segments_[i0>>segment_length_log2](i0&(segment_length_m1_),i1,i2);
|
||||
}
|
||||
|
||||
template< typename iType0 , typename iType1 , typename iType2 , typename iType3 >
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
typename std::enable_if<( std::is_integral<iType0>::value &&
|
||||
std::is_integral<iType1>::value &&
|
||||
std::is_integral<iType2>::value &&
|
||||
std::is_integral<iType3>::value &&
|
||||
traits::rank == 4 )
|
||||
, typename traits::value_type &
|
||||
>::type
|
||||
operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ) const
|
||||
{
|
||||
return segments_[i0>>segment_length_log2](i0&(segment_length_m1_),i1,i2,i3);
|
||||
}
|
||||
|
||||
template< typename iType0 , typename iType1 , typename iType2 , typename iType3 ,
|
||||
typename iType4 >
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
typename std::enable_if<( std::is_integral<iType0>::value &&
|
||||
std::is_integral<iType1>::value &&
|
||||
std::is_integral<iType2>::value &&
|
||||
std::is_integral<iType3>::value &&
|
||||
std::is_integral<iType4>::value &&
|
||||
traits::rank == 5 )
|
||||
, typename traits::value_type &
|
||||
>::type
|
||||
operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ,
|
||||
const iType4 & i4 ) const
|
||||
{
|
||||
return segments_[i0>>segment_length_log2](i0&(segment_length_m1_),i1,i2,i3,i4);
|
||||
}
|
||||
|
||||
template< typename iType0 , typename iType1 , typename iType2 , typename iType3 ,
|
||||
typename iType4 , typename iType5 >
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
typename std::enable_if<( std::is_integral<iType0>::value &&
|
||||
std::is_integral<iType1>::value &&
|
||||
std::is_integral<iType2>::value &&
|
||||
std::is_integral<iType3>::value &&
|
||||
std::is_integral<iType4>::value &&
|
||||
std::is_integral<iType5>::value &&
|
||||
traits::rank == 6 )
|
||||
, typename traits::value_type &
|
||||
>::type
|
||||
operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ,
|
||||
const iType4 & i4 , const iType5 & i5 ) const
|
||||
{
|
||||
return segments_[i0>>segment_length_log2](i0&(segment_length_m1_),i1,i2,i3,i4,i5);
|
||||
}
|
||||
|
||||
template< typename iType0 , typename iType1 , typename iType2 , typename iType3 ,
|
||||
typename iType4 , typename iType5 , typename iType6 >
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
typename std::enable_if<( std::is_integral<iType0>::value &&
|
||||
std::is_integral<iType1>::value &&
|
||||
std::is_integral<iType2>::value &&
|
||||
std::is_integral<iType3>::value &&
|
||||
std::is_integral<iType4>::value &&
|
||||
std::is_integral<iType5>::value &&
|
||||
std::is_integral<iType6>::value &&
|
||||
traits::rank == 7 )
|
||||
, typename traits::value_type &
|
||||
>::type
|
||||
operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ,
|
||||
const iType4 & i4 , const iType5 & i5 , const iType6 & i6 ) const
|
||||
{
|
||||
return segments_[i0>>segment_length_log2](i0&(segment_length_m1_),i1,i2,i3,i4,i5,i6);
|
||||
}
|
||||
|
||||
template< typename iType0 , typename iType1 , typename iType2 , typename iType3 ,
|
||||
typename iType4 , typename iType5 , typename iType6 , typename iType7 >
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
typename std::enable_if<( std::is_integral<iType0>::value &&
|
||||
std::is_integral<iType1>::value &&
|
||||
std::is_integral<iType2>::value &&
|
||||
std::is_integral<iType3>::value &&
|
||||
std::is_integral<iType4>::value &&
|
||||
std::is_integral<iType5>::value &&
|
||||
std::is_integral<iType6>::value &&
|
||||
std::is_integral<iType7>::value &&
|
||||
traits::rank == 8 )
|
||||
, typename traits::value_type &
|
||||
>::type
|
||||
operator() ( const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ,
|
||||
const iType4 & i4 , const iType5 & i5 , const iType6 & i6 , const iType7 & i7 ) const
|
||||
{
|
||||
return segments_[i0>>segment_length_log2](i0&(segment_length_m1_),i1,i2,i3,i4,i5,i6,i7);
|
||||
}
|
||||
};
|
||||
|
||||
namespace Impl {
|
||||
template<class DataType, class Arg1Type, class Arg2Type, class Arg3Type>
|
||||
struct delete_segmented_view {
|
||||
typedef SegmentedView<DataType , Arg1Type , Arg2Type, Arg3Type> view_type;
|
||||
typedef typename view_type::execution_space execution_space;
|
||||
|
||||
view_type view_;
|
||||
delete_segmented_view(view_type view):view_(view) {
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (int i) const {
|
||||
delete [] view_.get_segment(i).ptr_on_device();
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
@ -241,9 +241,9 @@ public:
|
||||
typedef UnorderedMap<const_key_type,value_type,execution_space,hasher_type,equal_to_type> modifiable_map_type;
|
||||
typedef UnorderedMap<const_key_type,const_value_type,execution_space,hasher_type,equal_to_type> const_map_type;
|
||||
|
||||
static const bool is_set = Impl::is_same<void,value_type>::value;
|
||||
static const bool has_const_key = Impl::is_same<const_key_type,declared_key_type>::value;
|
||||
static const bool has_const_value = is_set || Impl::is_same<const_value_type,declared_value_type>::value;
|
||||
static const bool is_set = std::is_same<void,value_type>::value;
|
||||
static const bool has_const_key = std::is_same<const_key_type,declared_key_type>::value;
|
||||
static const bool has_const_value = is_set || std::is_same<const_value_type,declared_value_type>::value;
|
||||
|
||||
static const bool is_insertable_map = !has_const_key && (is_set || !has_const_value);
|
||||
static const bool is_modifiable_map = has_const_key && !has_const_value;
|
||||
@ -735,8 +735,8 @@ public:
|
||||
}
|
||||
|
||||
template <typename SKey, typename SValue, typename SDevice>
|
||||
typename Impl::enable_if< Impl::is_same< typename Impl::remove_const<SKey>::type, key_type>::value &&
|
||||
Impl::is_same< typename Impl::remove_const<SValue>::type, value_type>::value
|
||||
typename Impl::enable_if< std::is_same< typename Impl::remove_const<SKey>::type, key_type>::value &&
|
||||
std::is_same< typename Impl::remove_const<SValue>::type, value_type>::value
|
||||
>::type
|
||||
create_copy_view( UnorderedMap<SKey, SValue, SDevice, Hasher,EqualTo> const& src)
|
||||
{
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
|
||||
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
|
||||
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
|
||||
INCLUDE_DIRECTORIES(REQUIRED_DURING_INSTALLATION_TESTING ${CMAKE_CURRENT_SOURCE_DIR})
|
||||
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../src )
|
||||
|
||||
SET(SOURCES
|
||||
|
||||
@ -7,21 +7,18 @@ vpath %.cpp ${KOKKOS_PATH}/containers/unit_tests
|
||||
default: build_all
|
||||
echo "End Build"
|
||||
|
||||
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
|
||||
CXX = $(KOKKOS_PATH)/config/nvcc_wrapper
|
||||
else
|
||||
CXX = g++
|
||||
endif
|
||||
|
||||
CXXFLAGS = -O3
|
||||
LINK ?= $(CXX)
|
||||
LDFLAGS ?= -lpthread
|
||||
|
||||
include $(KOKKOS_PATH)/Makefile.kokkos
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
|
||||
CXX = $(NVCC_WRAPPER)
|
||||
CXXFLAGS ?= -O3
|
||||
LINK = $(CXX)
|
||||
LDFLAGS ?= -lpthread
|
||||
else
|
||||
CXX ?= g++
|
||||
CXXFLAGS ?= -O3
|
||||
LINK ?= $(CXX)
|
||||
LDFLAGS ?= -lpthread
|
||||
endif
|
||||
|
||||
KOKKOS_CXXFLAGS += -I$(GTEST_PATH) -I${KOKKOS_PATH}/containers/unit_tests
|
||||
|
||||
TEST_TARGETS =
|
||||
|
||||
@ -59,11 +59,13 @@
|
||||
#include <TestVector.hpp>
|
||||
#include <TestDualView.hpp>
|
||||
#include <TestDynamicView.hpp>
|
||||
#include <TestSegmentedView.hpp>
|
||||
|
||||
#include <Kokkos_DynRankView.hpp>
|
||||
#include <TestDynViewAPI.hpp>
|
||||
|
||||
#include <Kokkos_ErrorReporter.hpp>
|
||||
#include <TestErrorReporter.hpp>
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
|
||||
@ -133,11 +135,6 @@ void cuda_test_dualview_combinations(unsigned int size)
|
||||
test_dualview_combinations<int,Kokkos::Cuda>(size);
|
||||
}
|
||||
|
||||
void cuda_test_segmented_view(unsigned int size)
|
||||
{
|
||||
test_segmented_view<double,Kokkos::Cuda>(size);
|
||||
}
|
||||
|
||||
void cuda_test_bitset()
|
||||
{
|
||||
test_bitset<Kokkos::Cuda>();
|
||||
@ -184,11 +181,6 @@ void cuda_test_bitset()
|
||||
cuda_test_dualview_combinations(size); \
|
||||
}
|
||||
|
||||
#define CUDA_SEGMENTEDVIEW_TEST( size ) \
|
||||
TEST_F( cuda, segmentedview_##size##x) { \
|
||||
cuda_test_segmented_view(size); \
|
||||
}
|
||||
|
||||
CUDA_DUALVIEW_COMBINE_TEST( 10 )
|
||||
CUDA_VECTOR_COMBINE_TEST( 10 )
|
||||
CUDA_VECTOR_COMBINE_TEST( 3057 )
|
||||
@ -198,7 +190,6 @@ CUDA_INSERT_TEST(close, 100000, 90000, 100, 500)
|
||||
CUDA_INSERT_TEST(far, 100000, 90000, 100, 500)
|
||||
CUDA_DEEP_COPY( 10000, 1 )
|
||||
CUDA_FAILED_INSERT_TEST( 10000, 1000 )
|
||||
CUDA_SEGMENTEDVIEW_TEST( 200 )
|
||||
|
||||
|
||||
#undef CUDA_INSERT_TEST
|
||||
@ -207,7 +198,6 @@ CUDA_SEGMENTEDVIEW_TEST( 200 )
|
||||
#undef CUDA_DEEP_COPY
|
||||
#undef CUDA_VECTOR_COMBINE_TEST
|
||||
#undef CUDA_DUALVIEW_COMBINE_TEST
|
||||
#undef CUDA_SEGMENTEDVIEW_TEST
|
||||
|
||||
|
||||
TEST_F( cuda , dynamic_view )
|
||||
@ -221,6 +211,18 @@ TEST_F( cuda , dynamic_view )
|
||||
}
|
||||
|
||||
|
||||
#if defined(KOKKOS_CLASS_LAMBDA)
|
||||
TEST_F(cuda, ErrorReporterViaLambda)
|
||||
{
|
||||
TestErrorReporter<ErrorReporterDriverUseLambda<Kokkos::Cuda>>();
|
||||
}
|
||||
#endif
|
||||
|
||||
TEST_F(cuda, ErrorReporter)
|
||||
{
|
||||
TestErrorReporter<ErrorReporterDriver<Kokkos::Cuda>>();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif /* #ifdef KOKKOS_HAVE_CUDA */
|
||||
|
||||
@ -715,9 +715,9 @@ public:
|
||||
typedef Kokkos::Experimental::DynRankView< T, device, Kokkos::MemoryUnmanaged > dView0_unmanaged ;
|
||||
typedef typename dView0::host_mirror_space host_drv_space ;
|
||||
|
||||
typedef Kokkos::Experimental::View< T , device > View0 ;
|
||||
typedef Kokkos::Experimental::View< T* , device > View1 ;
|
||||
typedef Kokkos::Experimental::View< T******* , device > View7 ;
|
||||
typedef Kokkos::View< T , device > View0 ;
|
||||
typedef Kokkos::View< T* , device > View1 ;
|
||||
typedef Kokkos::View< T******* , device > View7 ;
|
||||
|
||||
typedef typename View0::host_mirror_space host_view_space ;
|
||||
|
||||
@ -1127,8 +1127,7 @@ public:
|
||||
// T v2 = hx(0,0) ; // Generates compile error as intended
|
||||
// hx(0,0) = v2 ; // Generates compile error as intended
|
||||
|
||||
/*
|
||||
#if ! KOKKOS_USING_EXP_VIEW
|
||||
#if 0 /* Asynchronous deep copies not implemented for dynamic rank view */
|
||||
// Testing with asynchronous deep copy with respect to device
|
||||
{
|
||||
size_t count = 0 ;
|
||||
@ -1193,7 +1192,7 @@ public:
|
||||
{ ASSERT_EQ( hx(ip,i1,i2,i3) , T(0) ); }
|
||||
}}}}
|
||||
}
|
||||
#endif */ // #if ! KOKKOS_USING_EXP_VIEW
|
||||
#endif
|
||||
|
||||
// Testing with synchronous deep copy
|
||||
{
|
||||
|
||||
227
lib/kokkos/containers/unit_tests/TestErrorReporter.hpp
Normal file
227
lib/kokkos/containers/unit_tests/TestErrorReporter.hpp
Normal file
@ -0,0 +1,227 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_TEST_EXPERIMENTAL_ERROR_REPORTER_HPP
|
||||
#define KOKKOS_TEST_EXPERIMENTAL_ERROR_REPORTER_HPP
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
#include <iostream>
|
||||
#include <Kokkos_Core.hpp>
|
||||
|
||||
namespace Test {
|
||||
|
||||
// Just save the data in the report. Informative text goies in the operator<<(..).
|
||||
template <typename DataType1, typename DataType2, typename DataType3>
|
||||
struct ThreeValReport
|
||||
{
|
||||
DataType1 m_data1;
|
||||
DataType2 m_data2;
|
||||
DataType3 m_data3;
|
||||
|
||||
};
|
||||
|
||||
template <typename DataType1, typename DataType2, typename DataType3>
|
||||
std::ostream &operator<<(std::ostream & os, const ThreeValReport<DataType1, DataType2, DataType3> &val)
|
||||
{
|
||||
return os << "{" << val.m_data1 << " " << val.m_data2 << " " << val.m_data3 << "}";
|
||||
}
|
||||
|
||||
template<typename ReportType>
|
||||
void checkReportersAndReportsAgree(const std::vector<int> &reporters,
|
||||
const std::vector<ReportType> &reports)
|
||||
{
|
||||
for (size_t i = 0; i < reports.size(); ++i) {
|
||||
EXPECT_EQ(1, reporters[i] % 2);
|
||||
EXPECT_EQ(reporters[i], reports[i].m_data1);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <typename DeviceType>
|
||||
struct ErrorReporterDriverBase {
|
||||
|
||||
typedef ThreeValReport<int, int, double> report_type;
|
||||
typedef Kokkos::Experimental::ErrorReporter<report_type, DeviceType> error_reporter_type;
|
||||
error_reporter_type m_errorReporter;
|
||||
|
||||
ErrorReporterDriverBase(int reporter_capacity, int test_size)
|
||||
: m_errorReporter(reporter_capacity) { }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION bool error_condition(const int work_idx) const { return (work_idx % 2 != 0); }
|
||||
|
||||
void check_expectations(int reporter_capacity, int test_size)
|
||||
{
|
||||
int num_reported = m_errorReporter.getNumReports();
|
||||
int num_attempts = m_errorReporter.getNumReportAttempts();
|
||||
|
||||
int expected_num_reports = std::min(reporter_capacity, test_size / 2);
|
||||
EXPECT_EQ(expected_num_reports, num_reported);
|
||||
EXPECT_EQ(test_size / 2, num_attempts);
|
||||
|
||||
bool expect_full = (reporter_capacity <= (test_size / 2));
|
||||
bool reported_full = m_errorReporter.full();
|
||||
EXPECT_EQ(expect_full, reported_full);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename ErrorReporterDriverType>
|
||||
void TestErrorReporter()
|
||||
{
|
||||
typedef ErrorReporterDriverType tester_type;
|
||||
std::vector<int> reporters;
|
||||
std::vector<typename tester_type::report_type> reports;
|
||||
|
||||
tester_type test1(100, 10);
|
||||
test1.m_errorReporter.getReports(reporters, reports);
|
||||
checkReportersAndReportsAgree(reporters, reports);
|
||||
|
||||
tester_type test2(10, 100);
|
||||
test2.m_errorReporter.getReports(reporters, reports);
|
||||
checkReportersAndReportsAgree(reporters, reports);
|
||||
|
||||
typename Kokkos::View<int*, typename ErrorReporterDriverType::execution_space >::HostMirror view_reporters;
|
||||
typename Kokkos::View<typename tester_type::report_type*, typename ErrorReporterDriverType::execution_space >::HostMirror
|
||||
view_reports;
|
||||
test2.m_errorReporter.getReports(view_reporters, view_reports);
|
||||
|
||||
int num_reports = view_reporters.extent(0);
|
||||
reporters.clear();
|
||||
reports.clear();
|
||||
reporters.reserve(num_reports);
|
||||
reports.reserve(num_reports);
|
||||
|
||||
for (int i = 0; i < num_reports; ++i) {
|
||||
reporters.push_back(view_reporters(i));
|
||||
reports.push_back(view_reports(i));
|
||||
}
|
||||
checkReportersAndReportsAgree(reporters, reports);
|
||||
|
||||
}
|
||||
|
||||
|
||||
template <typename DeviceType>
|
||||
struct ErrorReporterDriver : public ErrorReporterDriverBase<DeviceType>
|
||||
{
|
||||
typedef ErrorReporterDriverBase<DeviceType> driver_base;
|
||||
typedef typename driver_base::error_reporter_type::execution_space execution_space;
|
||||
|
||||
ErrorReporterDriver(int reporter_capacity, int test_size)
|
||||
: driver_base(reporter_capacity, test_size)
|
||||
{
|
||||
execute(reporter_capacity, test_size);
|
||||
|
||||
// Test that clear() and resize() work across memory spaces.
|
||||
if (reporter_capacity < test_size) {
|
||||
driver_base::m_errorReporter.clear();
|
||||
driver_base::m_errorReporter.resize(test_size);
|
||||
execute(test_size, test_size);
|
||||
}
|
||||
}
|
||||
|
||||
void execute(int reporter_capacity, int test_size)
|
||||
{
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<execution_space>(0,test_size), *this);
|
||||
driver_base::check_expectations(reporter_capacity, test_size);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(const int work_idx) const
|
||||
{
|
||||
if (driver_base::error_condition(work_idx)) {
|
||||
double val = M_PI * static_cast<double>(work_idx);
|
||||
typename driver_base::report_type report = {work_idx, -2*work_idx, val};
|
||||
driver_base::m_errorReporter.add_report(work_idx, report);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
#if defined(KOKKOS_CLASS_LAMBDA)
|
||||
template <typename DeviceType>
|
||||
struct ErrorReporterDriverUseLambda : public ErrorReporterDriverBase<DeviceType>
|
||||
{
|
||||
|
||||
typedef ErrorReporterDriverBase<DeviceType> driver_base;
|
||||
typedef typename driver_base::error_reporter_type::execution_space execution_space;
|
||||
|
||||
ErrorReporterDriverUseLambda(int reporter_capacity, int test_size)
|
||||
: driver_base(reporter_capacity, test_size)
|
||||
{
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<execution_space>(0,test_size), KOKKOS_CLASS_LAMBDA (const int work_idx) {
|
||||
if (driver_base::error_condition(work_idx)) {
|
||||
double val = M_PI * static_cast<double>(work_idx);
|
||||
typename driver_base::report_type report = {work_idx, -2*work_idx, val};
|
||||
driver_base::m_errorReporter.add_report(work_idx, report);
|
||||
}
|
||||
});
|
||||
driver_base::check_expectations(reporter_capacity, test_size);
|
||||
}
|
||||
|
||||
};
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef KOKKOS_HAVE_OPENMP
|
||||
struct ErrorReporterDriverNativeOpenMP : public ErrorReporterDriverBase<Kokkos::OpenMP>
|
||||
{
|
||||
typedef ErrorReporterDriverBase<Kokkos::OpenMP> driver_base;
|
||||
typedef typename driver_base::error_reporter_type::execution_space execution_space;
|
||||
|
||||
ErrorReporterDriverNativeOpenMP(int reporter_capacity, int test_size)
|
||||
: driver_base(reporter_capacity, test_size)
|
||||
{
|
||||
#pragma omp parallel for
|
||||
for(int work_idx = 0; work_idx < test_size; ++work_idx)
|
||||
{
|
||||
if (driver_base::error_condition(work_idx)) {
|
||||
double val = M_PI * static_cast<double>(work_idx);
|
||||
typename driver_base::report_type report = {work_idx, -2*work_idx, val};
|
||||
driver_base::m_errorReporter.add_report(work_idx, report);
|
||||
}
|
||||
};
|
||||
driver_base::check_expectations(reporter_capacity, test_size);
|
||||
}
|
||||
};
|
||||
#endif
|
||||
|
||||
} // namespace Test
|
||||
#endif // #ifndef KOKKOS_TEST_ERROR_REPORTING_HPP
|
||||
@ -56,12 +56,14 @@
|
||||
#include <TestVector.hpp>
|
||||
#include <TestDualView.hpp>
|
||||
#include <TestDynamicView.hpp>
|
||||
#include <TestSegmentedView.hpp>
|
||||
#include <TestComplex.hpp>
|
||||
|
||||
#include <Kokkos_DynRankView.hpp>
|
||||
#include <TestDynViewAPI.hpp>
|
||||
|
||||
#include <Kokkos_ErrorReporter.hpp>
|
||||
#include <TestErrorReporter.hpp>
|
||||
|
||||
#include <iomanip>
|
||||
|
||||
namespace Test {
|
||||
@ -143,11 +145,6 @@ TEST_F( openmp , staticcrsgraph )
|
||||
test_dualview_combinations<int,Kokkos::OpenMP>(size); \
|
||||
}
|
||||
|
||||
#define OPENMP_SEGMENTEDVIEW_TEST( size ) \
|
||||
TEST_F( openmp, segmentedview_##size##x) { \
|
||||
test_segmented_view<double,Kokkos::OpenMP>(size); \
|
||||
}
|
||||
|
||||
OPENMP_INSERT_TEST(close, 100000, 90000, 100, 500, true)
|
||||
OPENMP_INSERT_TEST(far, 100000, 90000, 100, 500, false)
|
||||
OPENMP_FAILED_INSERT_TEST( 10000, 1000 )
|
||||
@ -156,7 +153,6 @@ OPENMP_DEEP_COPY( 10000, 1 )
|
||||
OPENMP_VECTOR_COMBINE_TEST( 10 )
|
||||
OPENMP_VECTOR_COMBINE_TEST( 3057 )
|
||||
OPENMP_DUALVIEW_COMBINE_TEST( 10 )
|
||||
OPENMP_SEGMENTEDVIEW_TEST( 10000 )
|
||||
|
||||
#undef OPENMP_INSERT_TEST
|
||||
#undef OPENMP_FAILED_INSERT_TEST
|
||||
@ -164,7 +160,6 @@ OPENMP_SEGMENTEDVIEW_TEST( 10000 )
|
||||
#undef OPENMP_DEEP_COPY
|
||||
#undef OPENMP_VECTOR_COMBINE_TEST
|
||||
#undef OPENMP_DUALVIEW_COMBINE_TEST
|
||||
#undef OPENMP_SEGMENTEDVIEW_TEST
|
||||
#endif
|
||||
|
||||
|
||||
@ -178,5 +173,22 @@ TEST_F( openmp , dynamic_view )
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(KOKKOS_CLASS_LAMBDA)
|
||||
TEST_F(openmp, ErrorReporterViaLambda)
|
||||
{
|
||||
TestErrorReporter<ErrorReporterDriverUseLambda<Kokkos::OpenMP>>();
|
||||
}
|
||||
#endif
|
||||
|
||||
TEST_F(openmp, ErrorReporter)
|
||||
{
|
||||
TestErrorReporter<ErrorReporterDriver<Kokkos::OpenMP>>();
|
||||
}
|
||||
|
||||
TEST_F(openmp, ErrorReporterNativeOpenMP)
|
||||
{
|
||||
TestErrorReporter<ErrorReporterDriverNativeOpenMP>();
|
||||
}
|
||||
|
||||
} // namespace test
|
||||
|
||||
|
||||
@ -1,708 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_TEST_SEGMENTEDVIEW_HPP
|
||||
#define KOKKOS_TEST_SEGMENTEDVIEW_HPP
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
#include <iostream>
|
||||
#include <cstdlib>
|
||||
#include <cstdio>
|
||||
#include <Kokkos_Core.hpp>
|
||||
|
||||
#if ! KOKKOS_USING_EXP_VIEW
|
||||
|
||||
#include <Kokkos_SegmentedView.hpp>
|
||||
#include <impl/Kokkos_Timer.hpp>
|
||||
|
||||
namespace Test {
|
||||
|
||||
namespace Impl {
|
||||
|
||||
template<class ViewType , class ExecutionSpace, int Rank = ViewType::Rank>
|
||||
struct GrowTest;
|
||||
|
||||
template<class ViewType , class ExecutionSpace>
|
||||
struct GrowTest<ViewType , ExecutionSpace , 1> {
|
||||
typedef ExecutionSpace execution_space;
|
||||
typedef Kokkos::TeamPolicy<execution_space> Policy;
|
||||
typedef typename Policy::member_type team_type;
|
||||
typedef double value_type;
|
||||
|
||||
ViewType a;
|
||||
|
||||
GrowTest(ViewType in):a(in) {}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (team_type team_member, double& value) const {
|
||||
unsigned int team_idx = team_member.league_rank() * team_member.team_size();
|
||||
|
||||
a.grow(team_member , team_idx+team_member.team_size());
|
||||
value += team_idx + team_member.team_rank();
|
||||
|
||||
if((a.dimension_0()>team_idx+team_member.team_rank()) &&
|
||||
(a.dimension(0)>team_idx+team_member.team_rank()))
|
||||
a(team_idx+team_member.team_rank()) = team_idx+team_member.team_rank();
|
||||
|
||||
}
|
||||
};
|
||||
|
||||
template<class ViewType , class ExecutionSpace>
|
||||
struct GrowTest<ViewType , ExecutionSpace , 2> {
|
||||
typedef ExecutionSpace execution_space;
|
||||
typedef Kokkos::TeamPolicy<execution_space> Policy;
|
||||
typedef typename Policy::member_type team_type;
|
||||
typedef double value_type;
|
||||
|
||||
ViewType a;
|
||||
|
||||
GrowTest(ViewType in):a(in) {}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (team_type team_member, double& value) const {
|
||||
unsigned int team_idx = team_member.league_rank() * team_member.team_size();
|
||||
|
||||
a.grow(team_member , team_idx+ team_member.team_size());
|
||||
|
||||
for( typename ExecutionSpace::size_type k=0;k<7;k++)
|
||||
value += team_idx + team_member.team_rank() + 13*k;
|
||||
|
||||
if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
|
||||
(a.dimension(0)>team_idx+ team_member.team_rank())) {
|
||||
for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++) {
|
||||
a(team_idx+ team_member.team_rank(),k) =
|
||||
team_idx+ team_member.team_rank() + 13*k;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template<class ViewType , class ExecutionSpace>
|
||||
struct GrowTest<ViewType , ExecutionSpace , 3> {
|
||||
typedef ExecutionSpace execution_space;
|
||||
typedef Kokkos::TeamPolicy<execution_space> Policy;
|
||||
typedef typename Policy::member_type team_type;
|
||||
typedef double value_type;
|
||||
|
||||
ViewType a;
|
||||
|
||||
GrowTest(ViewType in):a(in) {}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (team_type team_member, double& value) const {
|
||||
unsigned int team_idx = team_member.league_rank() * team_member.team_size();
|
||||
|
||||
a.grow(team_member , team_idx+ team_member.team_size());
|
||||
|
||||
for( typename ExecutionSpace::size_type k=0;k<7;k++)
|
||||
for( typename ExecutionSpace::size_type l=0;l<3;l++)
|
||||
value += team_idx + team_member.team_rank() + 13*k + 3*l;
|
||||
|
||||
if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
|
||||
(a.dimension(0)>team_idx+ team_member.team_rank())) {
|
||||
for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++)
|
||||
for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++)
|
||||
a(team_idx+ team_member.team_rank(),k,l) =
|
||||
team_idx+ team_member.team_rank() + 13*k + 3*l;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template<class ViewType , class ExecutionSpace>
|
||||
struct GrowTest<ViewType , ExecutionSpace , 4> {
|
||||
typedef ExecutionSpace execution_space;
|
||||
typedef Kokkos::TeamPolicy<execution_space> Policy;
|
||||
typedef typename Policy::member_type team_type;
|
||||
typedef double value_type;
|
||||
|
||||
ViewType a;
|
||||
|
||||
GrowTest(ViewType in):a(in) {}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (team_type team_member, double& value) const {
|
||||
unsigned int team_idx = team_member.league_rank() * team_member.team_size();
|
||||
|
||||
a.grow(team_member , team_idx+ team_member.team_size());
|
||||
|
||||
for( typename ExecutionSpace::size_type k=0;k<7;k++)
|
||||
for( typename ExecutionSpace::size_type l=0;l<3;l++)
|
||||
for( typename ExecutionSpace::size_type m=0;m<2;m++)
|
||||
value += team_idx + team_member.team_rank() + 13*k + 3*l + 7*m;
|
||||
|
||||
if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
|
||||
(a.dimension(0)>team_idx+ team_member.team_rank())) {
|
||||
for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++)
|
||||
for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++)
|
||||
for( typename ExecutionSpace::size_type m=0;m<a.dimension_3();m++)
|
||||
a(team_idx+ team_member.team_rank(),k,l,m) =
|
||||
team_idx+ team_member.team_rank() + 13*k + 3*l + 7*m;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template<class ViewType , class ExecutionSpace>
|
||||
struct GrowTest<ViewType , ExecutionSpace , 5> {
|
||||
typedef ExecutionSpace execution_space;
|
||||
typedef Kokkos::TeamPolicy<execution_space> Policy;
|
||||
typedef typename Policy::member_type team_type;
|
||||
typedef double value_type;
|
||||
|
||||
ViewType a;
|
||||
|
||||
GrowTest(ViewType in):a(in) {}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (team_type team_member, double& value) const {
|
||||
unsigned int team_idx = team_member.league_rank() * team_member.team_size();
|
||||
|
||||
a.grow(team_member , team_idx+ team_member.team_size());
|
||||
|
||||
for( typename ExecutionSpace::size_type k=0;k<7;k++)
|
||||
for( typename ExecutionSpace::size_type l=0;l<3;l++)
|
||||
for( typename ExecutionSpace::size_type m=0;m<2;m++)
|
||||
for( typename ExecutionSpace::size_type n=0;n<3;n++)
|
||||
value +=
|
||||
team_idx + team_member.team_rank() + 13*k + 3*l + 7*m + 5*n;
|
||||
|
||||
if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
|
||||
(a.dimension(0)>team_idx+ team_member.team_rank())) {
|
||||
for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++)
|
||||
for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++)
|
||||
for( typename ExecutionSpace::size_type m=0;m<a.dimension_3();m++)
|
||||
for( typename ExecutionSpace::size_type n=0;n<a.dimension_4();n++)
|
||||
a(team_idx+ team_member.team_rank(),k,l,m,n) =
|
||||
team_idx+ team_member.team_rank() + 13*k + 3*l + 7*m + 5*n;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template<class ViewType , class ExecutionSpace>
|
||||
struct GrowTest<ViewType , ExecutionSpace , 6> {
|
||||
typedef ExecutionSpace execution_space;
|
||||
typedef Kokkos::TeamPolicy<execution_space> Policy;
|
||||
typedef typename Policy::member_type team_type;
|
||||
typedef double value_type;
|
||||
|
||||
ViewType a;
|
||||
|
||||
GrowTest(ViewType in):a(in) {}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (team_type team_member, double& value) const {
|
||||
unsigned int team_idx = team_member.league_rank() * team_member.team_size();
|
||||
|
||||
a.grow(team_member , team_idx+ team_member.team_size());
|
||||
|
||||
for( typename ExecutionSpace::size_type k=0;k<7;k++)
|
||||
for( typename ExecutionSpace::size_type l=0;l<3;l++)
|
||||
for( typename ExecutionSpace::size_type m=0;m<2;m++)
|
||||
for( typename ExecutionSpace::size_type n=0;n<3;n++)
|
||||
for( typename ExecutionSpace::size_type o=0;o<2;o++)
|
||||
value +=
|
||||
team_idx + team_member.team_rank() + 13*k + 3*l + 7*m + 5*n + 2*o ;
|
||||
|
||||
if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
|
||||
(a.dimension(0)>team_idx+ team_member.team_rank())) {
|
||||
for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++)
|
||||
for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++)
|
||||
for( typename ExecutionSpace::size_type m=0;m<a.dimension_3();m++)
|
||||
for( typename ExecutionSpace::size_type n=0;n<a.dimension_4();n++)
|
||||
for( typename ExecutionSpace::size_type o=0;o<a.dimension_5();o++)
|
||||
a(team_idx+ team_member.team_rank(),k,l,m,n,o) =
|
||||
team_idx + team_member.team_rank() + 13*k + 3*l + 7*m + 5*n + 2*o ;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template<class ViewType , class ExecutionSpace>
|
||||
struct GrowTest<ViewType , ExecutionSpace , 7> {
|
||||
typedef ExecutionSpace execution_space;
|
||||
typedef Kokkos::TeamPolicy<execution_space> Policy;
|
||||
typedef typename Policy::member_type team_type;
|
||||
typedef double value_type;
|
||||
|
||||
ViewType a;
|
||||
|
||||
GrowTest(ViewType in):a(in) {}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (team_type team_member, double& value) const {
|
||||
unsigned int team_idx = team_member.league_rank() * team_member.team_size();
|
||||
|
||||
a.grow(team_member , team_idx+ team_member.team_size());
|
||||
|
||||
for( typename ExecutionSpace::size_type k=0;k<7;k++)
|
||||
for( typename ExecutionSpace::size_type l=0;l<3;l++)
|
||||
for( typename ExecutionSpace::size_type m=0;m<2;m++)
|
||||
for( typename ExecutionSpace::size_type n=0;n<3;n++)
|
||||
for( typename ExecutionSpace::size_type o=0;o<2;o++)
|
||||
for( typename ExecutionSpace::size_type p=0;p<4;p++)
|
||||
value +=
|
||||
team_idx + team_member.team_rank() + 13*k + 3*l + 7*m + 5*n + 2*o + 15*p ;
|
||||
|
||||
if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
|
||||
(a.dimension(0)>team_idx+ team_member.team_rank())) {
|
||||
for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++)
|
||||
for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++)
|
||||
for( typename ExecutionSpace::size_type m=0;m<a.dimension_3();m++)
|
||||
for( typename ExecutionSpace::size_type n=0;n<a.dimension_4();n++)
|
||||
for( typename ExecutionSpace::size_type o=0;o<a.dimension_5();o++)
|
||||
for( typename ExecutionSpace::size_type p=0;p<a.dimension_6();p++)
|
||||
a(team_idx+ team_member.team_rank(),k,l,m,n,o,p) =
|
||||
team_idx + team_member.team_rank() + 13*k + 3*l + 7*m + 5*n + 2*o + 15*p ;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template<class ViewType , class ExecutionSpace>
|
||||
struct GrowTest<ViewType , ExecutionSpace , 8> {
|
||||
typedef ExecutionSpace execution_space;
|
||||
typedef Kokkos::TeamPolicy<execution_space> Policy;
|
||||
typedef typename Policy::member_type team_type;
|
||||
typedef double value_type;
|
||||
|
||||
ViewType a;
|
||||
|
||||
GrowTest(ViewType in):a(in) {}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (team_type team_member, double& value) const {
|
||||
unsigned int team_idx = team_member.league_rank() * team_member.team_size();
|
||||
a.grow(team_member , team_idx + team_member.team_size());
|
||||
|
||||
for( typename ExecutionSpace::size_type k=0;k<7;k++)
|
||||
for( typename ExecutionSpace::size_type l=0;l<3;l++)
|
||||
for( typename ExecutionSpace::size_type m=0;m<2;m++)
|
||||
for( typename ExecutionSpace::size_type n=0;n<3;n++)
|
||||
for( typename ExecutionSpace::size_type o=0;o<2;o++)
|
||||
for( typename ExecutionSpace::size_type p=0;p<4;p++)
|
||||
for( typename ExecutionSpace::size_type q=0;q<3;q++)
|
||||
value +=
|
||||
team_idx + team_member.team_rank() + 13*k + 3*l + 7*m + 5*n + 2*o + 15*p + 17*q;
|
||||
|
||||
if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
|
||||
(a.dimension(0)>team_idx+ team_member.team_rank())) {
|
||||
for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++)
|
||||
for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++)
|
||||
for( typename ExecutionSpace::size_type m=0;m<a.dimension_3();m++)
|
||||
for( typename ExecutionSpace::size_type n=0;n<a.dimension_4();n++)
|
||||
for( typename ExecutionSpace::size_type o=0;o<a.dimension_5();o++)
|
||||
for( typename ExecutionSpace::size_type p=0;p<a.dimension_6();p++)
|
||||
for( typename ExecutionSpace::size_type q=0;q<a.dimension_7();q++)
|
||||
a(team_idx+ team_member.team_rank(),k,l,m,n,o,p,q) =
|
||||
team_idx + team_member.team_rank() + 13*k + 3*l + 7*m + 5*n + 2*o + 15*p + 17*q;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template<class ViewType , class ExecutionSpace, int Rank = ViewType::Rank>
|
||||
struct VerifyTest;
|
||||
|
||||
template<class ViewType , class ExecutionSpace>
|
||||
struct VerifyTest<ViewType , ExecutionSpace , 1> {
|
||||
typedef ExecutionSpace execution_space;
|
||||
typedef Kokkos::TeamPolicy<execution_space> Policy;
|
||||
typedef typename Policy::member_type team_type;
|
||||
typedef double value_type;
|
||||
|
||||
ViewType a;
|
||||
|
||||
VerifyTest(ViewType in):a(in) {}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (team_type team_member, double& value) const {
|
||||
unsigned int team_idx = team_member.league_rank() * team_member.team_size();
|
||||
|
||||
if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
|
||||
(a.dimension(0)>team_idx+ team_member.team_rank())) {
|
||||
value += a(team_idx+ team_member.team_rank());
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template<class ViewType , class ExecutionSpace>
|
||||
struct VerifyTest<ViewType , ExecutionSpace , 2> {
|
||||
typedef ExecutionSpace execution_space;
|
||||
typedef Kokkos::TeamPolicy<execution_space> Policy;
|
||||
typedef typename Policy::member_type team_type;
|
||||
typedef double value_type;
|
||||
|
||||
ViewType a;
|
||||
|
||||
VerifyTest(ViewType in):a(in) {}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (team_type team_member, double& value) const {
|
||||
unsigned int team_idx = team_member.league_rank() * team_member.team_size();
|
||||
|
||||
if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
|
||||
(a.dimension(0)>team_idx+ team_member.team_rank())) {
|
||||
for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++)
|
||||
value += a(team_idx+ team_member.team_rank(),k);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template<class ViewType , class ExecutionSpace>
|
||||
struct VerifyTest<ViewType , ExecutionSpace , 3> {
|
||||
typedef ExecutionSpace execution_space;
|
||||
typedef Kokkos::TeamPolicy<execution_space> Policy;
|
||||
typedef typename Policy::member_type team_type;
|
||||
typedef double value_type;
|
||||
|
||||
ViewType a;
|
||||
|
||||
VerifyTest(ViewType in):a(in) {}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (team_type team_member, double& value) const {
|
||||
unsigned int team_idx = team_member.league_rank() * team_member.team_size();
|
||||
|
||||
if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
|
||||
(a.dimension(0)>team_idx+ team_member.team_rank())) {
|
||||
for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++)
|
||||
for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++)
|
||||
value += a(team_idx+ team_member.team_rank(),k,l);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template<class ViewType , class ExecutionSpace>
|
||||
struct VerifyTest<ViewType , ExecutionSpace , 4> {
|
||||
typedef ExecutionSpace execution_space;
|
||||
typedef Kokkos::TeamPolicy<execution_space> Policy;
|
||||
typedef typename Policy::member_type team_type;
|
||||
typedef double value_type;
|
||||
|
||||
ViewType a;
|
||||
|
||||
VerifyTest(ViewType in):a(in) {}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (team_type team_member, double& value) const {
|
||||
unsigned int team_idx = team_member.league_rank() * team_member.team_size();
|
||||
|
||||
if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
|
||||
(a.dimension(0)>team_idx+ team_member.team_rank())) {
|
||||
for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++)
|
||||
for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++)
|
||||
for( typename ExecutionSpace::size_type m=0;m<a.dimension_3();m++)
|
||||
value += a(team_idx+ team_member.team_rank(),k,l,m);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template<class ViewType , class ExecutionSpace>
|
||||
struct VerifyTest<ViewType , ExecutionSpace , 5> {
|
||||
typedef ExecutionSpace execution_space;
|
||||
typedef Kokkos::TeamPolicy<execution_space> Policy;
|
||||
typedef typename Policy::member_type team_type;
|
||||
typedef double value_type;
|
||||
|
||||
ViewType a;
|
||||
|
||||
VerifyTest(ViewType in):a(in) {}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (team_type team_member, double& value) const {
|
||||
unsigned int team_idx = team_member.league_rank() * team_member.team_size();
|
||||
|
||||
if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
|
||||
(a.dimension(0)>team_idx+ team_member.team_rank())) {
|
||||
for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++)
|
||||
for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++)
|
||||
for( typename ExecutionSpace::size_type m=0;m<a.dimension_3();m++)
|
||||
for( typename ExecutionSpace::size_type n=0;n<a.dimension_4();n++)
|
||||
value += a(team_idx+ team_member.team_rank(),k,l,m,n);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template<class ViewType , class ExecutionSpace>
|
||||
struct VerifyTest<ViewType , ExecutionSpace , 6> {
|
||||
typedef ExecutionSpace execution_space;
|
||||
typedef Kokkos::TeamPolicy<execution_space> Policy;
|
||||
typedef typename Policy::member_type team_type;
|
||||
typedef double value_type;
|
||||
|
||||
ViewType a;
|
||||
|
||||
VerifyTest(ViewType in):a(in) {}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (team_type team_member, double& value) const {
|
||||
unsigned int team_idx = team_member.league_rank() * team_member.team_size();
|
||||
|
||||
if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
|
||||
(a.dimension(0)>team_idx+ team_member.team_rank())) {
|
||||
for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++)
|
||||
for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++)
|
||||
for( typename ExecutionSpace::size_type m=0;m<a.dimension_3();m++)
|
||||
for( typename ExecutionSpace::size_type n=0;n<a.dimension_4();n++)
|
||||
for( typename ExecutionSpace::size_type o=0;o<a.dimension_5();o++)
|
||||
value += a(team_idx+ team_member.team_rank(),k,l,m,n,o);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template<class ViewType , class ExecutionSpace>
|
||||
struct VerifyTest<ViewType , ExecutionSpace , 7> {
|
||||
typedef ExecutionSpace execution_space;
|
||||
typedef Kokkos::TeamPolicy<execution_space> Policy;
|
||||
typedef typename Policy::member_type team_type;
|
||||
typedef double value_type;
|
||||
|
||||
ViewType a;
|
||||
|
||||
VerifyTest(ViewType in):a(in) {}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (team_type team_member, double& value) const {
|
||||
unsigned int team_idx = team_member.league_rank() * team_member.team_size();
|
||||
|
||||
if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
|
||||
(a.dimension(0)>team_idx+ team_member.team_rank())) {
|
||||
for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++)
|
||||
for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++)
|
||||
for( typename ExecutionSpace::size_type m=0;m<a.dimension_3();m++)
|
||||
for( typename ExecutionSpace::size_type n=0;n<a.dimension_4();n++)
|
||||
for( typename ExecutionSpace::size_type o=0;o<a.dimension_5();o++)
|
||||
for( typename ExecutionSpace::size_type p=0;p<a.dimension_6();p++)
|
||||
value += a(team_idx+ team_member.team_rank(),k,l,m,n,o,p);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template<class ViewType , class ExecutionSpace>
|
||||
struct VerifyTest<ViewType , ExecutionSpace , 8> {
|
||||
typedef ExecutionSpace execution_space;
|
||||
typedef Kokkos::TeamPolicy<execution_space> Policy;
|
||||
typedef typename Policy::member_type team_type;
|
||||
typedef double value_type;
|
||||
|
||||
ViewType a;
|
||||
|
||||
VerifyTest(ViewType in):a(in) {}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (team_type team_member, double& value) const {
|
||||
unsigned int team_idx = team_member.league_rank() * team_member.team_size();
|
||||
|
||||
if((a.dimension_0()>team_idx+ team_member.team_rank()) &&
|
||||
(a.dimension(0)>team_idx+ team_member.team_rank())) {
|
||||
for( typename ExecutionSpace::size_type k=0;k<a.dimension_1();k++)
|
||||
for( typename ExecutionSpace::size_type l=0;l<a.dimension_2();l++)
|
||||
for( typename ExecutionSpace::size_type m=0;m<a.dimension_3();m++)
|
||||
for( typename ExecutionSpace::size_type n=0;n<a.dimension_4();n++)
|
||||
for( typename ExecutionSpace::size_type o=0;o<a.dimension_5();o++)
|
||||
for( typename ExecutionSpace::size_type p=0;p<a.dimension_6();p++)
|
||||
for( typename ExecutionSpace::size_type q=0;q<a.dimension_7();q++)
|
||||
value += a(team_idx+ team_member.team_rank(),k,l,m,n,o,p,q);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Scalar, class ExecutionSpace>
|
||||
struct test_segmented_view
|
||||
{
|
||||
typedef test_segmented_view<Scalar,ExecutionSpace> self_type;
|
||||
|
||||
typedef Scalar scalar_type;
|
||||
typedef ExecutionSpace execution_space;
|
||||
typedef Kokkos::TeamPolicy<execution_space> Policy;
|
||||
|
||||
double result;
|
||||
double reference;
|
||||
|
||||
template <class ViewType>
|
||||
void run_me(ViewType a, int max_length){
|
||||
const int team_size = Policy::team_size_max( GrowTest<ViewType,execution_space>(a) );
|
||||
const int nteams = max_length/team_size;
|
||||
|
||||
reference = 0;
|
||||
result = 0;
|
||||
|
||||
Kokkos::parallel_reduce(Policy(nteams,team_size),GrowTest<ViewType,execution_space>(a),reference);
|
||||
Kokkos::fence();
|
||||
Kokkos::parallel_reduce(Policy(nteams,team_size),VerifyTest<ViewType,execution_space>(a),result);
|
||||
Kokkos::fence();
|
||||
}
|
||||
|
||||
|
||||
test_segmented_view(unsigned int size,int rank)
|
||||
{
|
||||
reference = 0;
|
||||
result = 0;
|
||||
|
||||
const int dim_1 = 7;
|
||||
const int dim_2 = 3;
|
||||
const int dim_3 = 2;
|
||||
const int dim_4 = 3;
|
||||
const int dim_5 = 2;
|
||||
const int dim_6 = 4;
|
||||
//const int dim_7 = 3;
|
||||
|
||||
if(rank==1) {
|
||||
typedef Kokkos::Experimental::SegmentedView<Scalar*,Kokkos::LayoutLeft,ExecutionSpace> rank1_view;
|
||||
run_me< rank1_view >(rank1_view("Rank1",128,size), size);
|
||||
}
|
||||
if(rank==2) {
|
||||
typedef Kokkos::Experimental::SegmentedView<Scalar**,Kokkos::LayoutLeft,ExecutionSpace> rank2_view;
|
||||
run_me< rank2_view >(rank2_view("Rank2",128,size,dim_1), size);
|
||||
}
|
||||
if(rank==3) {
|
||||
typedef Kokkos::Experimental::SegmentedView<Scalar*[7][3][2],Kokkos::LayoutRight,ExecutionSpace> rank3_view;
|
||||
run_me< rank3_view >(rank3_view("Rank3",128,size), size);
|
||||
}
|
||||
if(rank==4) {
|
||||
typedef Kokkos::Experimental::SegmentedView<Scalar****,Kokkos::LayoutRight,ExecutionSpace> rank4_view;
|
||||
run_me< rank4_view >(rank4_view("Rank4",128,size,dim_1,dim_2,dim_3), size);
|
||||
}
|
||||
if(rank==5) {
|
||||
typedef Kokkos::Experimental::SegmentedView<Scalar*[7][3][2][3],Kokkos::LayoutLeft,ExecutionSpace> rank5_view;
|
||||
run_me< rank5_view >(rank5_view("Rank5",128,size), size);
|
||||
}
|
||||
if(rank==6) {
|
||||
typedef Kokkos::Experimental::SegmentedView<Scalar*****[2],Kokkos::LayoutRight,ExecutionSpace> rank6_view;
|
||||
run_me< rank6_view >(rank6_view("Rank6",128,size,dim_1,dim_2,dim_3,dim_4), size);
|
||||
}
|
||||
if(rank==7) {
|
||||
typedef Kokkos::Experimental::SegmentedView<Scalar*******,Kokkos::LayoutLeft,ExecutionSpace> rank7_view;
|
||||
run_me< rank7_view >(rank7_view("Rank7",128,size,dim_1,dim_2,dim_3,dim_4,dim_5,dim_6), size);
|
||||
}
|
||||
if(rank==8) {
|
||||
typedef Kokkos::Experimental::SegmentedView<Scalar*****[2][4][3],Kokkos::LayoutLeft,ExecutionSpace> rank8_view;
|
||||
run_me< rank8_view >(rank8_view("Rank8",128,size,dim_1,dim_2,dim_3,dim_4), size);
|
||||
}
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
} // namespace Impl
|
||||
|
||||
|
||||
|
||||
|
||||
template <typename Scalar, class ExecutionSpace>
|
||||
void test_segmented_view(unsigned int size)
|
||||
{
|
||||
{
|
||||
typedef Kokkos::Experimental::SegmentedView<Scalar*****[2][4][3],Kokkos::LayoutLeft,ExecutionSpace> view_type;
|
||||
view_type a("A",128,size,7,3,2,3);
|
||||
double reference;
|
||||
|
||||
Impl::GrowTest<view_type,ExecutionSpace> f(a);
|
||||
|
||||
const int team_size = Kokkos::TeamPolicy<ExecutionSpace>::team_size_max( f );
|
||||
const int nteams = (size+team_size-1)/team_size;
|
||||
|
||||
Kokkos::parallel_reduce(Kokkos::TeamPolicy<ExecutionSpace>(nteams,team_size),f,reference);
|
||||
|
||||
size_t real_size = ((size+127)/128)*128;
|
||||
|
||||
ASSERT_EQ(real_size,a.dimension_0());
|
||||
ASSERT_EQ(7,a.dimension_1());
|
||||
ASSERT_EQ(3,a.dimension_2());
|
||||
ASSERT_EQ(2,a.dimension_3());
|
||||
ASSERT_EQ(3,a.dimension_4());
|
||||
ASSERT_EQ(2,a.dimension_5());
|
||||
ASSERT_EQ(4,a.dimension_6());
|
||||
ASSERT_EQ(3,a.dimension_7());
|
||||
ASSERT_EQ(real_size,a.dimension(0));
|
||||
ASSERT_EQ(7,a.dimension(1));
|
||||
ASSERT_EQ(3,a.dimension(2));
|
||||
ASSERT_EQ(2,a.dimension(3));
|
||||
ASSERT_EQ(3,a.dimension(4));
|
||||
ASSERT_EQ(2,a.dimension(5));
|
||||
ASSERT_EQ(4,a.dimension(6));
|
||||
ASSERT_EQ(3,a.dimension(7));
|
||||
ASSERT_EQ(8,a.Rank);
|
||||
}
|
||||
{
|
||||
Impl::test_segmented_view<Scalar,ExecutionSpace> test(size,1);
|
||||
ASSERT_EQ(test.reference,test.result);
|
||||
}
|
||||
{
|
||||
Impl::test_segmented_view<Scalar,ExecutionSpace> test(size,2);
|
||||
ASSERT_EQ(test.reference,test.result);
|
||||
}
|
||||
{
|
||||
Impl::test_segmented_view<Scalar,ExecutionSpace> test(size,3);
|
||||
ASSERT_EQ(test.reference,test.result);
|
||||
}
|
||||
{
|
||||
Impl::test_segmented_view<Scalar,ExecutionSpace> test(size,4);
|
||||
ASSERT_EQ(test.reference,test.result);
|
||||
}
|
||||
{
|
||||
Impl::test_segmented_view<Scalar,ExecutionSpace> test(size,5);
|
||||
ASSERT_EQ(test.reference,test.result);
|
||||
}
|
||||
{
|
||||
Impl::test_segmented_view<Scalar,ExecutionSpace> test(size,6);
|
||||
ASSERT_EQ(test.reference,test.result);
|
||||
}
|
||||
{
|
||||
Impl::test_segmented_view<Scalar,ExecutionSpace> test(size,7);
|
||||
ASSERT_EQ(test.reference,test.result);
|
||||
}
|
||||
{
|
||||
Impl::test_segmented_view<Scalar,ExecutionSpace> test(size,8);
|
||||
ASSERT_EQ(test.reference,test.result);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
} // namespace Test
|
||||
|
||||
#else
|
||||
|
||||
template <typename Scalar, class ExecutionSpace>
|
||||
void test_segmented_view(unsigned int ) {}
|
||||
|
||||
#endif
|
||||
|
||||
#endif /* #ifndef KOKKOS_TEST_SEGMENTEDVIEW_HPP */
|
||||
|
||||
@ -58,7 +58,6 @@
|
||||
#include <TestStaticCrsGraph.hpp>
|
||||
#include <TestVector.hpp>
|
||||
#include <TestDualView.hpp>
|
||||
#include <TestSegmentedView.hpp>
|
||||
#include <TestDynamicView.hpp>
|
||||
#include <TestComplex.hpp>
|
||||
|
||||
@ -67,6 +66,9 @@
|
||||
#include <Kokkos_DynRankView.hpp>
|
||||
#include <TestDynViewAPI.hpp>
|
||||
|
||||
#include <Kokkos_ErrorReporter.hpp>
|
||||
#include <TestErrorReporter.hpp>
|
||||
|
||||
namespace Test {
|
||||
|
||||
class serial : public ::testing::Test {
|
||||
@ -135,11 +137,6 @@ TEST_F( serial, bitset )
|
||||
test_dualview_combinations<int,Kokkos::Serial>(size); \
|
||||
}
|
||||
|
||||
#define SERIAL_SEGMENTEDVIEW_TEST( size ) \
|
||||
TEST_F( serial, segmentedview_##size##x) { \
|
||||
test_segmented_view<double,Kokkos::Serial>(size); \
|
||||
}
|
||||
|
||||
SERIAL_INSERT_TEST(close, 100000, 90000, 100, 500, true)
|
||||
SERIAL_INSERT_TEST(far, 100000, 90000, 100, 500, false)
|
||||
SERIAL_FAILED_INSERT_TEST( 10000, 1000 )
|
||||
@ -148,7 +145,6 @@ SERIAL_DEEP_COPY( 10000, 1 )
|
||||
SERIAL_VECTOR_COMBINE_TEST( 10 )
|
||||
SERIAL_VECTOR_COMBINE_TEST( 3057 )
|
||||
SERIAL_DUALVIEW_COMBINE_TEST( 10 )
|
||||
SERIAL_SEGMENTEDVIEW_TEST( 10000 )
|
||||
|
||||
#undef SERIAL_INSERT_TEST
|
||||
#undef SERIAL_FAILED_INSERT_TEST
|
||||
@ -156,7 +152,6 @@ SERIAL_SEGMENTEDVIEW_TEST( 10000 )
|
||||
#undef SERIAL_DEEP_COPY
|
||||
#undef SERIAL_VECTOR_COMBINE_TEST
|
||||
#undef SERIAL_DUALVIEW_COMBINE_TEST
|
||||
#undef SERIAL_SEGMENTEDVIEW_TEST
|
||||
|
||||
TEST_F( serial , dynamic_view )
|
||||
{
|
||||
@ -168,6 +163,19 @@ TEST_F( serial , dynamic_view )
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(KOKKOS_CLASS_LAMBDA)
|
||||
TEST_F(serial, ErrorReporterViaLambda)
|
||||
{
|
||||
TestErrorReporter<ErrorReporterDriverUseLambda<Kokkos::Serial>>();
|
||||
}
|
||||
#endif
|
||||
|
||||
TEST_F(serial, ErrorReporter)
|
||||
{
|
||||
TestErrorReporter<ErrorReporterDriver<Kokkos::Serial>>();
|
||||
}
|
||||
|
||||
|
||||
} // namespace Test
|
||||
|
||||
#endif // KOKKOS_HAVE_SERIAL
|
||||
|
||||
@ -62,11 +62,13 @@
|
||||
#include <TestVector.hpp>
|
||||
#include <TestDualView.hpp>
|
||||
#include <TestDynamicView.hpp>
|
||||
#include <TestSegmentedView.hpp>
|
||||
|
||||
#include <Kokkos_DynRankView.hpp>
|
||||
#include <TestDynViewAPI.hpp>
|
||||
|
||||
#include <Kokkos_ErrorReporter.hpp>
|
||||
#include <TestErrorReporter.hpp>
|
||||
|
||||
namespace Test {
|
||||
|
||||
class threads : public ::testing::Test {
|
||||
@ -145,12 +147,6 @@ TEST_F( threads , staticcrsgraph )
|
||||
test_dualview_combinations<int,Kokkos::Threads>(size); \
|
||||
}
|
||||
|
||||
#define THREADS_SEGMENTEDVIEW_TEST( size ) \
|
||||
TEST_F( threads, segmentedview_##size##x) { \
|
||||
test_segmented_view<double,Kokkos::Threads>(size); \
|
||||
}
|
||||
|
||||
|
||||
THREADS_INSERT_TEST(far, 100000, 90000, 100, 500, false)
|
||||
THREADS_FAILED_INSERT_TEST( 10000, 1000 )
|
||||
THREADS_DEEP_COPY( 10000, 1 )
|
||||
@ -158,7 +154,6 @@ THREADS_DEEP_COPY( 10000, 1 )
|
||||
THREADS_VECTOR_COMBINE_TEST( 10 )
|
||||
THREADS_VECTOR_COMBINE_TEST( 3057 )
|
||||
THREADS_DUALVIEW_COMBINE_TEST( 10 )
|
||||
THREADS_SEGMENTEDVIEW_TEST( 10000 )
|
||||
|
||||
|
||||
#undef THREADS_INSERT_TEST
|
||||
@ -167,8 +162,6 @@ THREADS_SEGMENTEDVIEW_TEST( 10000 )
|
||||
#undef THREADS_DEEP_COPY
|
||||
#undef THREADS_VECTOR_COMBINE_TEST
|
||||
#undef THREADS_DUALVIEW_COMBINE_TEST
|
||||
#undef THREADS_SEGMENTEDVIEW_TEST
|
||||
|
||||
|
||||
|
||||
TEST_F( threads , dynamic_view )
|
||||
@ -181,6 +174,19 @@ TEST_F( threads , dynamic_view )
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#if defined(KOKKOS_CLASS_LAMBDA)
|
||||
TEST_F(threads, ErrorReporterViaLambda)
|
||||
{
|
||||
TestErrorReporter<ErrorReporterDriverUseLambda<Kokkos::Threads>>();
|
||||
}
|
||||
#endif
|
||||
|
||||
TEST_F(threads, ErrorReporter)
|
||||
{
|
||||
TestErrorReporter<ErrorReporterDriver<Kokkos::Threads>>();
|
||||
}
|
||||
|
||||
} // namespace Test
|
||||
|
||||
|
||||
|
||||
@ -2,3 +2,5 @@ TRIBITS_PACKAGE_DEFINE_DEPENDENCIES(
|
||||
LIB_OPTIONAL_TPLS Pthread CUDA HWLOC QTHREAD DLlib
|
||||
TEST_OPTIONAL_TPLS CUSPARSE
|
||||
)
|
||||
|
||||
TRIBITS_TPL_TENTATIVELY_ENABLE(DLlib)
|
||||
@ -45,6 +45,16 @@
|
||||
#define KOKKOS_ENABLE_PROFILING 0
|
||||
#endif
|
||||
|
||||
#cmakedefine KOKKOS_HAVE_CUDA_RDC
|
||||
#ifdef KOKKOS_HAVE_CUDA_RDC
|
||||
#define KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE 1
|
||||
#endif
|
||||
|
||||
#cmakedefine KOKKOS_HAVE_CUDA_LAMBDA
|
||||
#ifdef KOKKOS_HAVE_CUDA_LAMBDA
|
||||
#define KOKKOS_CUDA_USE_LAMBDA 1
|
||||
#endif
|
||||
|
||||
// Don't forbid users from defining this macro on the command line,
|
||||
// but still make sure that CMake logic can control its definition.
|
||||
#if ! defined(KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA)
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
|
||||
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINRARY_DIR})
|
||||
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
|
||||
INCLUDE_DIRECTORIES(REQUIRED_DURING_INSTALLATION_TESTING ${CMAKE_CURRENT_SOURCE_DIR})
|
||||
|
||||
SET(SOURCES
|
||||
PerfTestMain.cpp
|
||||
@ -19,7 +19,7 @@ TRIBITS_ADD_EXECUTABLE(
|
||||
TESTONLYLIBS kokkos_gtest
|
||||
)
|
||||
|
||||
TRIBITS_ADD_EXECUTABLE_AND_TEST(
|
||||
TRIBITS_ADD_TEST(
|
||||
PerfTest
|
||||
NAME PerfTestExec
|
||||
COMM serial mpi
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user