Compare commits

...

79 Commits

Author SHA1 Message Date
49e83b4348 patch 21Feb17 sync with GHub 2017-02-21 16:07:26 -07:00
6e89ccd522 Merge pull request #385 from akohlmey/collected-small-bugfixes
collected small bugfixes and updates
2017-02-21 15:59:06 -07:00
53f3df5bfc Merge pull request #384 from lammps/another_neigh_refactor
more neighbor list changes, some new options
2017-02-21 15:57:23 -07:00
3dbbea342a remove a debug print line 2017-02-21 15:57:03 -07:00
b70c670aac Merge pull request #383 from stanmoore1/rshan_class2_kk
Kokkos version of class2 bond, angle, dihedral, and improper from Ray Shan
2017-02-21 15:52:10 -07:00
1d17cae407 Merge pull request #382 from timattox/master_kokkos_neigh_bugfix
neighbor_kokkos.cpp: Don't call grow() on neighbor lists that are copies
2017-02-21 15:51:35 -07:00
429264a12b Merge pull request #380 from hheenen/core_shell_documentation
updated documentation and examples for coreshell
2017-02-21 15:50:53 -07:00
d001a09345 Merge pull request #379 from ndtrung81/pppm-gpu-compute-group-group
Fixed bugs with pppm/gpu when used with compute group/group
2017-02-21 15:50:28 -07:00
cb9d42da08 Merge pull request #378 from timattox/USER-DPD_ssa_update
USER-DPD: performance optimizations to ssa_update() in fix_shardlow
2017-02-21 15:50:07 -07:00
7185ec92b3 Merge pull request #377 from stanmoore1/kokkos_update
Kokkos library update
2017-02-21 15:49:50 -07:00
1cd4c48ccc new SNAP potential for W 2017-02-21 15:49:21 -07:00
a88136c3f5 correct a logic bug in fix wall/gran/region 2017-02-17 17:58:24 -05:00
ce20c7ffe9 remove debug code 2017-02-17 12:42:07 -05:00
4a80df3a99 more neighbor list changes, some new options 2017-02-15 16:45:33 -07:00
5f93fad012 Add copymode protection to class2 styles 2017-02-15 13:56:16 -07:00
ccaec315db Updating docs for Kokkos class2 2017-02-15 13:00:33 -07:00
c6c1852b3b Fix a few issues with Kokkos class2 files 2017-02-15 12:44:54 -07:00
69a8e19dc5 Add files from Ray Shan for Kokkos version of class2 angle, bond, etc. 2017-02-15 12:29:52 -07:00
928947dcea neighbor_kokkos.cpp: Don't call grow() on neighbor lists that are copies.
This corresponds to a bugfix from commit 9161bd98 on neighbor.cpp
2017-02-15 11:49:34 -05:00
48070011d9 update names in example, too 2017-02-14 07:42:36 -05:00
0fb8dacc00 one more Finchham to Fincham change 2017-02-14 07:41:05 -05:00
6b923476b9 updated documentation and examples for coreshell 2017-02-14 13:14:22 +01:00
20806dd86a Fixed bugs with pppm/gpu when used with compute group/group 2017-02-14 00:26:55 -06:00
90e5ae965d Add missing flags to Kokkos Makefile 2017-02-13 11:19:46 -07:00
15008c9d18 USER-DPD: performance optimizations to ssa_update() in fix_shardlow
Overall improvements range from 2% to 18% on our benchmarks
1) Newton has to be turned on for SSA, so remove those conditionals
2) Rework the math in ssa_update() to eliminate many ops and temporaries
3) Split ssa_update() into two versions, based on DPD vs. DPDE
4) Reorder code in ssa_update_*() to reduce register pressure
2017-02-13 13:11:19 -05:00
33af7ab248 Remove merge line 2017-02-13 10:59:22 -07:00
8f9b2aca06 Removing unused files in Kokkos lib 2017-02-13 10:53:51 -07:00
383da816c2 Updating Kokkos lib 2017-02-13 10:50:34 -07:00
cb982f2f28 sync 13Feb17 patch back to GH 2017-02-13 09:05:03 -07:00
4843296d4e Merge pull request #372 from akohlmey/fft-cleanup
simplify FFT3d code by removing support for outdated FFT libraries
2017-02-13 08:53:13 -07:00
2bdda8f6c0 patch 12Feb17 - change int to tagint for compute group/group 2017-02-13 08:40:54 -07:00
0068ef5616 added molecule option to compute group/group command 2017-02-10 09:25:32 -07:00
02b0e6cc55 Merge pull request #375 from akohlmey/small-updates-and-fixes
Small updates and fixes
2017-02-10 09:23:51 -07:00
fbb24c2406 Merge pull request #374 from agiliopadua/master
Updated polarizer.py in USER-DRUDE to use coul/long/cs
2017-02-10 09:22:22 -07:00
a5f830c40c fix typo
(cherry picked from commit 6410797697)
2017-02-08 14:33:45 -05:00
8c074a363a Merge branch 'master' into small-updates-and-fixes 2017-02-08 14:32:44 -05:00
27aca14094 Updated polarizer.py to use coul/long/cs 2017-02-04 15:02:08 +01:00
191453e1c7 Merge branch 'master' into fft-cleanup 2017-02-03 16:53:10 -05:00
207adc3968 Merge pull request #373 from stanmoore1/kk_more_bugfixes
Fixing Kokkos per-atom energy/virial issues
2017-02-03 14:45:31 -07:00
84c517159d Merge pull request #368 from Pakketeretet2/kokkos_morse
Kokkos morse
2017-02-03 14:45:07 -07:00
6ca377436f Merge pull request #366 from rbberger/kokkos_lammps_bigbig_fix
Fix data type of molecule array in npair_kokkos.h
2017-02-03 14:43:33 -07:00
dc34a32602 Merge pull request #362 from ibaned/warnings2
fix Kokkos+kspace warnings
2017-02-03 14:43:12 -07:00
067119f6c6 Adding missing friend statement to pair_lj_class2_coul_cut_kokkos 2017-02-02 15:21:30 -07:00
1834a5e46c Fixing more Kokkos per-atom and fdotr issues 2017-02-02 15:21:21 -07:00
6a4918b39a Fixing typo in pair_buck_coul_cut_kokkos 2017-02-02 15:21:05 -07:00
5da0d39392 Fixing fdotr in pair_buck_coul_cut_kokkos 2017-02-02 13:35:51 -07:00
6f92429602 Fixing per-atom ev issue 2017-02-02 13:34:27 -07:00
38e0e4bb69 Add missing typedef in Kokkos pair styles 2017-02-02 13:24:05 -07:00
daf9f95381 Fixing Kokkos per-atom e/v issue 2017-02-02 13:09:52 -07:00
6595fde0a1 explain in more detail the handling of error checking for numerical inputs 2017-02-02 11:58:12 -05:00
6bcec9c61d Merge pull request #2 from stanmoore1/kk_tag_bugfixes
Fixing tagint and imageint issues in Kokkos package
2017-02-02 08:57:21 -05:00
9d1991bf84 remove support for obsolete legacy FFT libraries and point -DFFT_FFTW to FFTW3 2017-02-02 08:10:23 -05:00
0a87b7443a Updated contributing authors and docs 2017-02-02 13:42:47 +01:00
7ee45ec5f3 Fixing tagint and imageint issues in Kokkos package 2017-02-01 11:52:27 -07:00
d4c9e2500b Ported Morse to KOKKOS 2017-02-01 17:45:21 +01:00
6232073d3b Removed traces of pair morse/kk 2017-02-01 17:39:37 +01:00
ed59193d13 Removed traces of pair morse/kk 2017-02-01 17:39:06 +01:00
67bed8e853 Merge pull request #1 from akohlmey/tagint-issue
Fix additional tagint issue in fix qeq/reax/kk
2017-01-31 18:34:35 -05:00
bcb1d94b9a silence compiler warning about dead code 2017-01-31 18:28:04 -05:00
fbe30b5683 correct issue with compiling for -DLAMMPS_BIGBIG in fix qeq/reax/kk 2017-01-31 18:13:44 -05:00
9ef55fedf7 Merge branch 'kokkos_lammps_bigbig_fix' of https://github.com/rbberger/lammps into tagint-issue 2017-01-31 17:23:51 -05:00
997142a4c1 Merge pull request #364 from stanmoore1/kk_triclinic_neighlist
Add triclinic neighbor list support to Kokkos
2017-01-30 07:27:02 -07:00
033b07fdb7 Merge pull request #363 from ibaned/obey-datamask
Fix GPU sync bugs
2017-01-30 07:26:49 -07:00
51a0b6b445 Fix data type of molecule array in npair_kokkos.h
This showed up when trying to compile with -DLAMMPS_BIGBIG.
Fixes issue #365
2017-01-28 07:49:08 -05:00
59f4a77dd5 Whitespace change to npair_kokkos 2017-01-27 15:17:39 -07:00
579cc6d7aa More tweaks to npair_kokkos for triclinic 2017-01-27 15:13:37 -07:00
5afd3e995b Adding support to npair_kokkos for triclinic-newton-on neighborlists 2017-01-27 14:18:01 -07:00
2a6f5e651c more preference of datamask over custom sync
see commit 09fc8b0 for details on why
2017-01-27 09:35:55 -07:00
09fc8b0bd7 kspace & dihedral can't do their own sync/modify
because the verlet_kokkos system has
a "clever" optimization which will
alter the datamasks before calling sync/modify,
so the datamask framework must be
strictly obeyed for GPU correctness.
(the optimization is to concurrently
compute forces on the host and GPU,
and add them up at the end of an iteration.
calling your own sync will overwrite
the partial GPU forces with the
partial host forces).
2017-01-27 08:39:55 -07:00
e5d0bde783 pppm_kokkos: remove useless statement 2017-01-27 08:35:37 -07:00
9daf7fb650 pppm_kokkos: don't shadow member variables 2017-01-27 08:35:37 -07:00
b5d622c6a3 pppm_kokkos: remove unused variables 2017-01-27 08:35:37 -07:00
2023fa28e0 consistent #ifdefs for fft3d variable (2)
this variable is only used when FFTW3
is enabled, so its declaration and
initialization should be protected
under the same conditions to avoid
compiler warnings
2017-01-27 08:35:37 -07:00
5b29515849 fft3d: use C++ loop declarations
the variable (offset) is only
used in a subset of numerous
scenarios with #ifdef, it seems
better just to have each loop
declare it as needed.
(avoids compiler warnings)
2017-01-27 08:35:37 -07:00
5b18421dd2 fft3d : remove unused variables 2017-01-27 08:35:37 -07:00
cf95ea0709 fft3d: only declare variables when used
avoids compiler warnings
2017-01-27 08:35:36 -07:00
6a74a81da0 consistent #ifdefs for fft3d variable
this variable is only used when FFTW3
is enabled, so its declaration and
initialization should be protected
under the same conditions to avoid
compiler warnings
2017-01-27 08:35:36 -07:00
f0a4ed615d add missing KOKKOS_INLINE_FUNCTION for params 2017-01-27 08:35:36 -07:00
cfe818a175 remove unused variables from fix_cmap 2017-01-27 08:35:36 -07:00
431 changed files with 22774 additions and 33342 deletions

View File

@ -1,7 +1,7 @@
<!-- HTML_ONLY -->
<HEAD>
<TITLE>LAMMPS Users Manual</TITLE>
<META NAME="docnumber" CONTENT="26 Jan 2017 version">
<META NAME="docnumber" CONTENT="21 Feb 2017 version">
<META NAME="author" CONTENT="http://lammps.sandia.gov - Sandia National Laboratories">
<META NAME="copyright" CONTENT="Copyright (2003) Sandia Corporation. This software and manual is distributed under the GNU General Public License.">
</HEAD>
@ -21,7 +21,7 @@
<H1></H1>
LAMMPS Documentation :c,h3
26 Jan 2017 version :c,h4
21 Feb 2017 version :c,h4
Version info: :h4

View File

@ -969,7 +969,7 @@ KOKKOS, o = USER-OMP, t = OPT.
"lubricateU/poly"_pair_lubricateU.html,
"meam"_pair_meam.html,
"mie/cut (o)"_pair_mie.html,
"morse (got)"_pair_morse.html,
"morse (gkot)"_pair_morse.html,
"nb3b/harmonic (o)"_pair_nb3b_harmonic.html,
"nm/cut (o)"_pair_nm.html,
"nm/cut/coul/cut (o)"_pair_nm.html,
@ -1076,7 +1076,7 @@ KOKKOS, o = USER-OMP, t = OPT.
"none"_bond_none.html,
"zero"_bond_zero.html,
"hybrid"_bond_hybrid.html,
"class2 (o)"_bond_class2.html,
"class2 (ko)"_bond_class2.html,
"fene (iko)"_bond_fene.html,
"fene/expand (o)"_bond_fene_expand.html,
"harmonic (ko)"_bond_harmonic.html,
@ -1109,7 +1109,7 @@ USER-OMP, t = OPT.
"zero"_angle_zero.html,
"hybrid"_angle_hybrid.html,
"charmm (ko)"_angle_charmm.html,
"class2 (o)"_angle_class2.html,
"class2 (ko)"_angle_class2.html,
"cosine (o)"_angle_cosine.html,
"cosine/delta (o)"_angle_cosine_delta.html,
"cosine/periodic (o)"_angle_cosine_periodic.html,
@ -1145,7 +1145,7 @@ USER-OMP, t = OPT.
"zero"_dihedral_zero.html,
"hybrid"_dihedral_hybrid.html,
"charmm (ko)"_dihedral_charmm.html,
"class2 (o)"_dihedral_class2.html,
"class2 (ko)"_dihedral_class2.html,
"harmonic (io)"_dihedral_harmonic.html,
"helix (o)"_dihedral_helix.html,
"multi/harmonic (o)"_dihedral_multi_harmonic.html,
@ -1177,7 +1177,7 @@ USER-OMP, t = OPT.
"none"_improper_none.html,
"zero"_improper_zero.html,
"hybrid"_improper_hybrid.html,
"class2 (o)"_improper_class2.html,
"class2 (ko)"_improper_class2.html,
"cvff (io)"_improper_cvff.html,
"harmonic (ko)"_improper_harmonic.html,
"umbrella (o)"_improper_umbrella.html :tb(c=4,ea=c)

View File

@ -22,7 +22,7 @@ either conceptually, or as printed out by the program.
12.1 Common problems :link(err_1),h4
If two LAMMPS runs do not produce the same answer on different
If two LAMMPS runs do not produce the exact same answer on different
machines or different numbers of processors, this is typically not a
bug. In theory you should get identical answers on any number of
processors and on any machine. In practice, numerical round-off can
@ -80,12 +80,24 @@ order. If you mess this up, LAMMPS will often flag the error, but it
may also simply read a bogus argument and assign a value that is
valid, but not what you wanted. E.g. trying to read the string "abc"
as an integer value of 0. Careful reading of the associated doc page
for the command should allow you to fix these problems. Note that
some commands allow for variables to be specified in place of numeric
constants so that the value can be evaluated and change over the
course of a run. This is typically done with the syntax {v_name} for
a parameter, where name is the name of the variable. This is only
allowed if the command documentation says it is.
for the command should allow you to fix these problems. In most cases,
where LAMMPS expects to read a number, either integer or floating point,
it performs a stringent test on whether the provided input actually
is an integer or floating-point number, respectively, and reject the
input with an error message (for instance, when an integer is required,
but a floating-point number 1.0 is provided):
ERROR: Expected integer parameter in input script or data file :pre
Some commands allow for using variable references in place of numeric
constants so that the value can be evaluated and may change over the
course of a run. This is typically done with the syntax {v_name} for a
parameter, where name is the name of the variable. On the other hand,
immediate variable expansion with the syntax ${name} is performed while
reading the input and before parsing commands,
NOTE: Using a variable reference (i.e. {v_name}) is only allowed if
the documentation of the corresponding command explicitly says it is.
Generally, LAMMPS will print a message to the screen and logfile and
exit gracefully when it encounters a fatal error. Sometimes it will

View File

@ -2573,7 +2573,7 @@ well.
6.26 Adiabatic core/shell model :link(howto_26),h4
The adiabatic core-shell model by "Mitchell and
Finchham"_#MitchellFinchham is a simple method for adding
Fincham"_#MitchellFincham is a simple method for adding
polarizability to a system. In order to mimic the electron shell of
an ion, a satellite particle is attached to it. This way the ions are
split into a core and a shell where the latter is meant to react to
@ -2667,13 +2667,16 @@ bond_coeff 1 63.014 0.0
bond_coeff 2 25.724 0.0 :pre
When running dynamics with the adiabatic core/shell model, the
following issues should be considered. Since the relative motion of
the core and shell particles corresponds to the polarization, typical
thermostats can alter the polarization behaviour, meaning the shell
will not react freely to its electrostatic environment. This is
critical during the equilibration of the system. Therefore
it's typically desirable to decouple the relative motion of the
core/shell pair, which is an imaginary degree of freedom, from the
following issues should be considered. The relative motion of
the core and shell particles corresponds to the polarization,
hereby an instantaneous relaxation of the shells is approximated
and a fast core/shell spring frequency ensures a nearly constant
internal kinetic energy during the simulation.
Thermostats can alter this polarization behaviour, by scaling the
internal kinetic energy, meaning the shell will not react freely to
its electrostatic environment.
Therefore it is typically desirable to decouple the relative motion of
the core/shell pair, which is an imaginary degree of freedom, from the
real physical system. To do that, the "compute
temp/cs"_compute_temp_cs.html command can be used, in conjunction with
any of the thermostat fixes, such as "fix nvt"_fix_nh.html or "fix
@ -2704,6 +2707,22 @@ fix thermostatequ all nve # integrator as needed f
fix_modify thermoberendsen temp CSequ
thermo_modify temp CSequ # output of center-of-mass derived temperature :pre
The pressure for the core/shell system is computed via the regular
LAMMPS convention by "treating the cores and shells as individual
particles"_#MitchellFincham2. For the thermo output of the pressure
as well as for the application of a barostat, it is necessary to
use an additional "pressure"_compute_pressure compute based on the
default "temperature"_compute_temp and specifying it as a second
argument in "fix modify"_fix_modify.html and
"thermo_modify"_thermo_modify.html resulting in:
(...)
compute CSequ all temp/cs cores shells
compute thermo_press_lmp all pressure thermo_temp # pressure for individual particles
thermo_modify temp CSequ press thermo_press_lmp # modify thermo to regular pressure
fix press_bar all npt temp 300 300 0.04 iso 0 0 0.4
fix_modify press_bar temp CSequ press thermo_press_lmp # pressure modification for correct kinetic scalar :pre
If "compute temp/cs"_compute_temp_cs.html is used, the decoupled
relative motion of the core and the shell should in theory be
stable. However numerical fluctuation can introduce a small
@ -2724,24 +2743,18 @@ temp/cs"_compute_temp_cs.html command to the {temp} keyword of the
velocity all create 1427 134 bias yes temp CSequ
velocity all scale 1427 temp CSequ :pre
It is important to note that the polarizability of the core/shell
pairs is based on their relative motion. Therefore the choice of
spring force and mass ratio need to ensure much faster relative motion
of the 2 atoms within the core/shell pair than their center-of-mass
velocity. This allow the shells to effectively react instantaneously
to the electrostatic environment. This fast movement also limits the
timestep size that can be used.
To maintain the correct polarizability of the core/shell pairs, the
kinetic energy of the internal motion shall remain nearly constant.
Therefore the choice of spring force and mass ratio need to ensure
much faster relative motion of the 2 atoms within the core/shell pair
than their center-of-mass velocity. This allows the shells to
effectively react instantaneously to the electrostatic environment and
limits energy transfer to or from the core/shell oscillators.
This fast movement also dictates the timestep that can be used.
The primary literature of the adiabatic core/shell model suggests that
the fast relative motion of the core/shell pairs only allows negligible
energy transfer to the environment. Therefore it is not intended to
decouple the core/shell degree of freedom from the physical system
during production runs. In other words, the "compute
temp/cs"_compute_temp_cs.html command should not be used during
production runs and is only required during equilibration. This way one
is consistent with literature (based on the code packages DL_POLY or
GULP for instance).
energy transfer to the environment.
The mentioned energy transfer will typically lead to a small drift
in total energy over time. This internal energy can be monitored
using the "compute chunk/atom"_compute_chunk_atom.html and "compute
@ -2761,14 +2774,20 @@ command, to use as input to the "compute
chunk/atom"_compute_chunk_atom.html command to define the core/shell
pairs as chunks.
For example,
For example if core/shell pairs are the only molecules:
read_data NaCl_CS_x0.1_prop.data
compute prop all property/atom molecule
compute cs_chunk all chunk/atom c_prop
compute cstherm all temp/chunk cs_chunk temp internal com yes cdof 3.0 # note the chosen degrees of freedom for the core/shell pairs
fix ave_chunk all ave/time 10 1 10 c_cstherm file chunk.dump mode vector :pre
For example if core/shell pairs and other molecules are present:
fix csinfo all property/atom i_CSID # property/atom command
read_data NaCl_CS_x0.1_prop.data fix csinfo NULL CS-Info # atom property added in the data-file
compute prop all property/atom i_CSID
compute cs_chunk all chunk/atom c_prop
compute cstherm all temp/chunk cs_chunk temp internal com yes cdof 3.0 # note the chosen degrees of freedom for the core/shell pairs
fix ave_chunk all ave/time 10 1 10 c_cstherm file chunk.dump mode vector :pre
(...) :pre
The additional section in the date file would be formatted like this:
@ -2890,9 +2909,13 @@ Phys, 79, 926 (1983).
:link(Shinoda)
[(Shinoda)] Shinoda, Shiga, and Mikami, Phys Rev B, 69, 134103 (2004).
:link(MitchellFinchham)
[(Mitchell and Finchham)] Mitchell, Finchham, J Phys Condensed Matter,
:link(MitchellFincham)
[(Mitchell and Fincham)] Mitchell, Fincham, J Phys Condensed Matter,
5, 1031-1038 (1993).
:link(MitchellFincham2)
[(Fincham)] Fincham, Mackrodt and Mitchell, J Phys Condensed Matter,
6, 393-404 (1994).
:link(howto-Lamoureux)
[(Lamoureux and Roux)] G. Lamoureux, B. Roux, J. Chem. Phys 119, 3025 (2003)

View File

@ -413,7 +413,7 @@ uses (for performing 1d FFTs) when running the particle-particle
particle-mesh (PPPM) option for long-range Coulombics via the
"kspace_style"_kspace_style.html command.
LAMMPS supports various open-source or vendor-supplied FFT libraries
LAMMPS supports common open-source or vendor-supplied FFT libraries
for this purpose. If you leave these 3 variables blank, LAMMPS will
use the open-source "KISS FFT library"_http://kissfft.sf.net, which is
included in the LAMMPS distribution. This library is portable to all
@ -423,10 +423,9 @@ package in your build, you can also leave the 3 variables blank.
Otherwise, select which kinds of FFTs to use as part of the FFT_INC
setting by a switch of the form -DFFT_XXX. Recommended values for XXX
are: MKL, SCSL, FFTW2, and FFTW3. Legacy options are: INTEL, SGI,
ACML, and T3E. For backward compatability, using -DFFT_FFTW will use
the FFTW2 library. Using -DFFT_NONE will use the KISS library
described above.
are: MKL or FFTW3. FFTW2 and NONE are supported as legacy options.
Selecting -DFFT_FFTW will use the FFTW3 library and -DFFT_NONE will
use the KISS library described above.
You may also need to set the FFT_INC, FFT_PATH, and FFT_LIB variables,
so the compiler and linker can find the needed FFT header and library

View File

@ -8,6 +8,7 @@
angle_style class2 command :h3
angle_style class2/omp command :h3
angle_style class2/kk command :h3
[Syntax:]

View File

@ -8,6 +8,7 @@
bond_style class2 command :h3
bond_style class2/omp command :h3
bond_style class2/kk command :h3
[Syntax:]

View File

@ -16,10 +16,11 @@ ID, group-ID are documented in "compute"_compute.html command :ulb,l
group/group = style name of this compute command :l
group2-ID = group ID of second (or same) group :l
zero or more keyword/value pairs may be appended :l
keyword = {pair} or {kspace} or {boundary} :l
keyword = {pair} or {kspace} or {boundary} or {molecule} :l
{pair} value = {yes} or {no}
{kspace} value = {yes} or {no}
{boundary} value = {yes} or {no} :pre
{boundary} value = {yes} or {no}
{molecule} value = {off} or {inter} or {intra} :pre
:ule
[Examples:]
@ -46,6 +47,13 @@ NOTE: The energies computed by the {pair} keyword do not include tail
corrections, even if they are enabled via the
"pair_modify"_pair_modify.html command.
If the {molecule} keyword is set to {inter} or {intra} than an
additional check is made based on the molecule IDs of the two atoms in
each pair before including their pairwise interaction energy and
force. For the {inter} setting, the two atoms must be in different
molecules. For the {intra} setting, the two atoms must be in the same
molecule.
If the {kspace} keyword is set to {yes}, which is not the default, and
if a "kspace_style"_kspace_style.html is defined, then the interaction
energy will include a Kspace component which is the long-range
@ -66,6 +74,10 @@ affect the force calculation and will be zero if one or both of the
groups are charge neutral. This energy correction term is the same as
that included in the regular Ewald and PPPM routines.
NOTE: The {molecule} setting only affects the group/group
contributions calculated by the {pair} keyword. It does not affect
the group/group contributions calculated by the {kspace} keyword.
This compute does not calculate any bond or angle or dihedral or
improper interactions between atoms in the two groups.
@ -78,6 +90,22 @@ work (FFTs, Ewald summation) as computing long-range forces for the
entire system. Thus it can be costly to invoke this compute too
frequently.
NOTE: If you have a bonded system, then the settings of
"special_bonds"_special_bonds.html command can remove pairwise
interactions between atoms in the same bond, angle, or dihedral. This
is the default setting for the "special_bonds"_special_bonds.html
command, and means those pairwise interactions do not appear in the
neighbor list. Because this compute uses a neighbor list, it also
means those pairs will not be included in the group/group interaction.
This does not apply when using long-range coulomb interactions
({coul/long}, {coul/msm}, {coul/wolf} or similar. One way to get
around this would be to set special_bond scaling factors to very tiny
numbers that are not exactly zero (e.g. 1.0e-50). Another workaround
is to write a dump file, and use the "rerun"_rerun.html command to
compute the group/group interactions for snapshots in the dump file.
The rerun script can use a "special_bonds"_special_bonds.html command
that includes all pairs in the neighbor list.
If you desire a breakdown of the interactions into a pairwise and
Kspace component, simply invoke the compute twice with the appropriate
yes/no settings for the {pair} and {kspace} keywords. This is no more
@ -119,7 +147,8 @@ The {ewald} and {pppm} styles do.
[Default:]
The option defaults are pair = yes, kspace = no, and boundary = yes.
The option defaults are pair = yes, kspace = no, boundary = yes,
molecule = off.
:line

View File

@ -10,21 +10,27 @@ compute rdf command :h3
[Syntax:]
compute ID group-ID rdf Nbin itype1 jtype1 itype2 jtype2 ... :pre
compute ID group-ID rdf Nbin itype1 jtype1 itype2 jtype2 ... keyword/value ... :pre
ID, group-ID are documented in "compute"_compute.html command
rdf = style name of this compute command
Nbin = number of RDF bins
itypeN = central atom type for Nth RDF histogram (see asterisk form below)
jtypeN = distribution atom type for Nth RDF histogram (see asterisk form below) :ul
ID, group-ID are documented in "compute"_compute.html command :ulb,l
rdf = style name of this compute command :l
Nbin = number of RDF bins :l
itypeN = central atom type for Nth RDF histogram (see asterisk form below) :l
jtypeN = distribution atom type for Nth RDF histogram (see asterisk form below) :l
zero or more keyword/value pairs may be appended :l
keyword = {cutoff} :l
{cutoff} value = Rcut
Rcut = cutoff distance for RDF computation (distance units) :pre
:ule
[Examples:]
compute 1 all rdf 100
compute 1 all rdf 100 1 1
compute 1 all rdf 100 * 3
compute 1 all rdf 100 * 3 cutoff 5.0
compute 1 fluid rdf 500 1 1 1 2 2 1 2 2
compute 1 fluid rdf 500 1*3 2 5 *10 :pre
compute 1 fluid rdf 500 1*3 2 5 *10 cutoff 3.5 :pre
[Description:]
@ -32,7 +38,8 @@ Define a computation that calculates the radial distribution function
(RDF), also called g(r), and the coordination number for a group of
particles. Both are calculated in histogram form by binning pairwise
distances into {Nbin} bins from 0.0 to the maximum force cutoff
defined by the "pair_style"_pair_style.html command. The bins are of
defined by the "pair_style"_pair_style.html command or the cutoff
distance {Rcut} specified via the {cutoff} keyword. The bins are of
uniform size in radial distance. Thus a single bin encompasses a thin
shell of distances in 3d and a thin ring of distances in 2d.
@ -41,17 +48,41 @@ NOTE: If you have a bonded system, then the settings of
interactions between atoms in the same bond, angle, or dihedral. This
is the default setting for the "special_bonds"_special_bonds.html
command, and means those pairwise interactions do not appear in the
neighbor list. Because this fix uses the neighbor list, it also means
neighbor list. Because this fix uses a neighbor list, it also means
those pairs will not be included in the RDF. This does not apply when
using long-range coulomb ({coul/long}, {coul/msm}, {coul/wolf} or
similar. One way to get around this would be to set special_bond
scaling factors to very tiny numbers that are not exactly zero
(e.g. 1.0e-50). Another workaround is to write a dump file, and use
the "rerun"_rerun.html command to compute the RDF for snapshots in the
dump file. The rerun script can use a
using long-range coulomb interactions ({coul/long}, {coul/msm},
{coul/wolf} or similar. One way to get around this would be to set
special_bond scaling factors to very tiny numbers that are not exactly
zero (e.g. 1.0e-50). Another workaround is to write a dump file, and
use the "rerun"_rerun.html command to compute the RDF for snapshots in
the dump file. The rerun script can use a
"special_bonds"_special_bonds.html command that includes all pairs in
the neighbor list.
By default the RDF is computed out to the maximum force cutoff defined
by the "pair_style"_pair_style.html command. If the {cutoff} keyword
is used, then the RDF is computed accurately out to the {Rcut} > 0.0
distance specified.
NOTE: Normally, you should only use the {cutoff} keyword if no pair
style is defined, e.g. the "rerun"_rerun.html command is being used to
post-process a dump file of snapshots. Or if you really want the RDF
for distances beyond the pair_style force cutoff and cannot easily
post-process a dump file to calculate it. This is because using the
{cutoff} keyword incurs extra computation and possibly communication,
which may slow down your simulation. If you specify a {Rcut} <= force
cutoff, you will force an additional neighbor list to be built at
every timestep this command is invoked (or every reneighboring
timestep, whichever is less frequent), which is inefficent. LAMMPS
will warn you if this is the case. If you specify a {Rcut} > force
cutoff, you must insure ghost atom information out to {Rcut} + {skin}
is communicated, via the "comm_modify cutoff"_comm_modify.html
command, else the RDF computation cannot be performed, and LAMMPS will
give an error message. The {skin} value is what is specified with the
"neighbor"_neighbor.html command. In this case, you are forcing a
large neighbor list to be built just for the RDF computation, and
extra communication to be performed every timestep.
The {itypeN} and {jtypeN} arguments are optional. These arguments
must come in pairs. If no pairs are listed, then a single histogram
is computed for g(r) between all atom types. If one or more pairs are
@ -153,4 +184,6 @@ change from zero to one at the location of the spike in g(r).
"fix ave/time"_fix_ave_time.html
[Default:] none
[Default:]
The keyword defaults are cutoff = 0.0 (use the pairwise force cutoff).

View File

@ -8,6 +8,7 @@
dihedral_style class2 command :h3
dihedral_style class2/omp command :h3
dihedral_style class2/kk command :h3
[Syntax:]

View File

@ -8,6 +8,7 @@
improper_style class2 command :h3
improper_style class2/omp command :h3
improper_style class2/kk command :h3
[Syntax:]

View File

@ -13,6 +13,7 @@ pair_style morse/opt command :h3
pair_style morse/smooth/linear command :h3
pair_style morse/smooth/linear/omp command :h3
pair_style morse/soft command :h3
pair_style morse/kk command :h3
[Syntax:]

View File

@ -54,7 +54,8 @@ reset_timestep 0
variable pxy equal pxy
variable pxx equal pxx-press
fix avstress all ave/time $s $p $d v_pxy v_pxx ave one file einstein.dat
fix avstress all ave/time $s $p $d v_pxy v_pxx ave one &
file profile.einstein.2d
# Diagonal components of SS are larger by factor 2-2/d,
# which is 4/3 for d=3, but 1 for d=2.

View File

@ -40,7 +40,8 @@ thermo 50
thermo_style custom step etotal pe ke temp press &
epair evdwl ecoul elong ebond fnorm fmax vol
compute CSequ all temp/cs cores shells
compute CStemp all temp/cs cores shells
compute thermo_press_lmp all pressure thermo_temp # press for correct kinetic scalar
# output via chunk method
@ -49,16 +50,18 @@ compute CSequ all temp/cs cores shells
#compute cstherm all temp/chunk cs_chunk temp internal com yes cdof 3.0
#fix ave_chunk all ave/time 100 1 100 c_cstherm file chunk.dump mode vector
thermo_modify temp CSequ
thermo_modify temp CStemp press thermo_press_lmp
# velocity bias option
velocity all create 1427 134 dist gaussian mom yes rot no bias yes temp CSequ
velocity all scale 1427 temp CSequ
velocity all create 1427 134 dist gaussian mom yes rot no bias yes temp CStemp
velocity all scale 1427 temp CStemp
# thermostating using the core/shell decoupling
fix thermoberendsen all temp/berendsen 1427 1427 0.4
fix nve all nve
fix_modify thermoberendsen temp CSequ
fix_modify thermoberendsen temp CStemp
# 2 fmsec timestep

View File

@ -0,0 +1,86 @@
# Testsystem for core-shell model compared to Mitchell and Fincham
# Hendrik Heenen, June 2014
# ------------------------ INITIALIZATION ----------------------------
units metal
dimension 3
boundary p p p
atom_style full
# ----------------------- ATOM DEFINITION ----------------------------
fix csinfo all property/atom i_CSID
read_data data.coreshell fix csinfo NULL CS-Info
group cores type 1 2
group shells type 3 4
neighbor 2.0 bin
comm_modify vel yes
# ------------------------ FORCE FIELDS ------------------------------
kspace_style ewald 1.0e-6
pair_style born/coul/long/cs 20.0 20.0 # A, rho, sigma=0, C, D
pair_coeff * * 0.0 1.000 0.00 0.00 0.00
pair_coeff 3 3 487.0 0.23768 0.00 1.05 0.50 #Na-Na
pair_coeff 3 4 145134.0 0.23768 0.00 6.99 8.70 #Na-Cl
pair_coeff 4 4 405774.0 0.23768 0.00 72.40 145.40 #Cl-Cl
bond_style harmonic
bond_coeff 1 63.014 0.0
bond_coeff 2 25.724 0.0
# ------------------------ Equilibration Run -------------------------------
reset_timestep 0
thermo 50
thermo_style custom step etotal pe ke temp press &
epair evdwl ecoul elong ebond fnorm fmax vol
compute CStemp all temp/cs cores shells
compute thermo_press_lmp all pressure thermo_temp # press for correct kinetic scalar
# output via chunk method
#compute prop all property/atom i_CSID
#compute cs_chunk all chunk/atom c_prop
#compute cstherm all temp/chunk cs_chunk temp internal com yes cdof 3.0
#fix ave_chunk all ave/time 100 1 100 c_cstherm file chunk.dump mode vector
thermo_modify temp CStemp press thermo_press_lmp
# 2 fmsec timestep
timestep 0.002
# velocity bias option
velocity all create 1427 134 dist gaussian mom yes rot no bias yes temp CStemp
velocity all scale 1427 temp CStemp
# thermostating using the core/shell decoupling
fix thermoberendsen all temp/berendsen 1427 1427 0.4
fix nve all nve
fix_modify thermoberendsen temp CStemp
run 500
unfix thermoberendsen
unfix nve
fix npt_equ all npt temp 1427 1427 0.04 iso 0 0 0.4
fix_modify npt_equ temp CStemp press thermo_press_lmp # pressure for correct kinetic scalar
run 500
unfix npt_equ
# ------------------------ Dynamic Run -------------------------------
fix npt_dyn all npt temp 1427 1427 0.04 iso 0 0 0.4
fix_modify npt_dyn temp CStemp press thermo_press_lmp # pressure for correct kinetic scalar
run 1000

View File

@ -0,0 +1,189 @@
LAMMPS (26 Jan 2017)
# Testsystem for core-shell model compared to Mitchel and Finchham
# Hendrik Heenen, June 2014
# ------------------------ INITIALIZATION ----------------------------
units metal
dimension 3
boundary p p p
atom_style full
# ----------------------- ATOM DEFINITION ----------------------------
fix csinfo all property/atom i_CSID
read_data data.coreshell fix csinfo NULL CS-Info
orthogonal box = (0 0 0) to (24.096 24.096 24.096)
1 by 2 by 2 MPI processor grid
reading atoms ...
432 atoms
scanning bonds ...
1 = max bonds/atom
reading bonds ...
216 bonds
1 = max # of 1-2 neighbors
0 = max # of 1-3 neighbors
0 = max # of 1-4 neighbors
1 = max # of special neighbors
group cores type 1 2
216 atoms in group cores
group shells type 3 4
216 atoms in group shells
neighbor 2.0 bin
comm_modify vel yes
# ------------------------ FORCE FIELDS ------------------------------
pair_style born/coul/dsf/cs 0.1 20.0 20.0 # A, rho, sigma=0, C, D
pair_coeff * * 0.0 1.000 0.00 0.00 0.00
pair_coeff 3 3 487.0 0.23768 0.00 1.05 0.50 #Na-Na
pair_coeff 3 4 145134.0 0.23768 0.00 6.99 8.70 #Na-Cl
pair_coeff 4 4 405774.0 0.23768 0.00 72.40 145.40 #Cl-Cl
bond_style harmonic
bond_coeff 1 63.014 0.0
bond_coeff 2 25.724 0.0
# ------------------------ Equilibration Run -------------------------------
reset_timestep 0
thermo 50
thermo_style custom step etotal pe ke temp press epair evdwl ecoul elong ebond fnorm fmax vol
compute CSequ all temp/cs cores shells
# output via chunk method
#compute prop all property/atom i_CSID
#compute cs_chunk all chunk/atom c_prop
#compute cstherm all temp/chunk cs_chunk temp internal com yes cdof 3.0
#fix ave_chunk all ave/time 100 1 100 c_cstherm file chunk.dump mode vector
thermo_modify temp CSequ
# velocity bias option
velocity all create 1427 134 dist gaussian mom yes rot no bias yes temp CSequ
Neighbor list info ...
update every 1 steps, delay 10 steps, check yes
max neighbors/atom: 2000, page size: 100000
master list distance cutoff = 22
ghost atom cutoff = 22
binsize = 11, bins = 3 3 3
1 neighbor lists, perpetual/occasional/extra = 1 0 0
(1) pair born/coul/dsf/cs, half, perpetual
pair build: half/bin/newton
stencil: half/bin/3d/newton
bin: standard
velocity all scale 1427 temp CSequ
fix thermoberendsen all temp/berendsen 1427 1427 0.4
fix nve all nve
fix_modify thermoberendsen temp CSequ
# 2 fmsec timestep
timestep 0.002
run 500
Memory usage per processor = 6.8559 Mbytes
Step TotEng PotEng KinEng Temp Press E_pair E_vdwl E_coul E_long E_bond Fnorm Fmax Volume
0 -635.80596 -675.46362 39.657659 1427 -21302.622 -675.46362 1.6320365 -677.09565 0 0 1.5814015e-14 3.2317898e-15 13990.5
50 -634.07021 -666.11867 32.048452 1153.1982 -4560.945 -668.28236 37.756542 -706.0389 0 2.163691 13.802484 3.022372 13990.5
100 -631.97128 -662.02544 30.054164 1081.4378 -3497.564 -664.61825 39.275003 -703.89325 0 2.5928078 13.956833 2.5417699 13990.5
150 -630.14953 -663.04215 32.892622 1183.5739 -88.43828 -665.63444 46.239965 -711.87441 0 2.5922927 14.667898 2.4964255 13990.5
200 -628.52878 -663.9795 35.45072 1275.6219 -1755.9004 -666.73564 41.758052 -708.49369 0 2.7561421 14.230743 3.0924004 13990.5
250 -627.27102 -662.025 34.753978 1250.5511 -1234.0918 -665.13519 43.170874 -708.30606 0 3.1101887 14.221086 1.941354 13990.5
300 -626.5495 -663.74287 37.193368 1338.3275 -2049.3444 -666.45574 40.476148 -706.93188 0 2.7128711 13.330425 1.7756755 13990.5
350 -625.87313 -665.21855 39.345421 1415.7647 -1543.1723 -667.90872 41.577366 -709.48609 0 2.6901682 13.541311 1.854662 13990.5
400 -625.09344 -661.26404 36.1706 1301.5253 -729.96729 -664.10334 43.468765 -707.57211 0 2.8392963 13.663555 1.9067551 13990.5
450 -624.46214 -660.01362 35.551477 1279.2474 -1617.7158 -663.06571 41.644856 -704.71057 0 3.0520921 14.527005 1.7280213 13990.5
500 -623.49246 -659.2527 35.76024 1286.7593 -935.99238 -662.32953 43.038808 -705.36834 0 3.0768302 14.099593 1.9831106 13990.5
Loop time of 4.09864 on 4 procs for 500 steps with 432 atoms
Performance: 21.080 ns/day, 1.139 hours/ns, 121.992 timesteps/s
99.7% CPU use with 4 MPI tasks x no OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 3.3804 | 3.568 | 3.8354 | 8.9 | 87.05
Bond | 0.00074339 | 0.00079519 | 0.00087976 | 0.0 | 0.02
Neigh | 0.045851 | 0.046084 | 0.046361 | 0.1 | 1.12
Comm | 0.20413 | 0.47123 | 0.65875 | 24.3 | 11.50
Output | 0.00044298 | 0.00046057 | 0.00051165 | 0.0 | 0.01
Modify | 0.0064909 | 0.0067219 | 0.0069766 | 0.2 | 0.16
Other | | 0.005345 | | | 0.13
Nlocal: 108 ave 114 max 105 min
Histogram: 1 1 1 0 0 0 0 0 0 1
Nghost: 6527 ave 6599 max 6472 min
Histogram: 1 0 1 0 1 0 0 0 0 1
Neighs: 74388.2 ave 75855 max 73680 min
Histogram: 1 2 0 0 0 0 0 0 0 1
Total # of neighbors = 297553
Ave neighs/atom = 688.78
Ave special neighs/atom = 1
Neighbor list builds = 20
Dangerous builds = 0
unfix thermoberendsen
# ------------------------ Dynamic Run -------------------------------
run 1000
Memory usage per processor = 6.85787 Mbytes
Step TotEng PotEng KinEng Temp Press E_pair E_vdwl E_coul E_long E_bond Fnorm Fmax Volume
500 -623.49319 -659.2527 35.759511 1286.7331 -936.04802 -662.32953 43.038808 -705.36834 0 3.0768302 14.099593 1.9831106 13990.5
550 -623.44059 -663.57938 40.138795 1444.3127 -935.73484 -666.2789 42.563337 -708.84224 0 2.6995167 13.918509 2.3189805 13990.5
600 -623.4703 -660.01592 36.545618 1315.0196 1327.3492 -663.08845 47.985462 -711.07391 0 3.0725254 15.192713 2.4098428 13990.5
650 -623.46796 -661.56776 38.099807 1370.9439 457.82439 -664.81976 45.495622 -710.31538 0 3.2519966 15.026057 1.8500226 13990.5
700 -623.50158 -659.5131 36.011523 1295.8012 -460.03772 -663.1078 43.938203 -707.046 0 3.5946908 14.660979 2.4825518 13990.5
750 -623.44787 -661.93353 38.485658 1384.8279 97.429626 -664.9551 45.083146 -710.03825 0 3.0215753 15.10043 2.3433897 13990.5
800 -623.48215 -659.50655 36.024402 1296.2647 1097.3866 -662.61124 47.251998 -709.86324 0 3.1046914 14.556382 2.0543766 13990.5
850 -623.45868 -661.13782 37.679134 1355.8068 -1802.1624 -664.41257 40.70845 -705.12102 0 3.2747525 14.691444 2.2054332 13990.5
900 -623.43556 -663.59137 40.155815 1444.9251 534.99197 -666.71877 45.601619 -712.32039 0 3.127395 14.741411 2.5807895 13990.5
950 -623.51318 -661.57916 38.06598 1369.7267 -678.12625 -664.37535 43.207862 -707.58322 0 2.7961988 14.430307 2.3936105 13990.5
1000 -623.47287 -661.22274 37.749874 1358.3523 634.7979 -664.42973 46.373361 -710.80309 0 3.2069879 15.891192 2.4042765 13990.5
1050 -623.48133 -661.52868 38.047347 1369.0562 -583.15228 -664.6098 43.618772 -708.22857 0 3.081116 14.806856 2.3447613 13990.5
1100 -623.47867 -661.83761 38.358946 1380.2685 -868.9779 -664.8826 42.84846 -707.73106 0 3.044983 14.69567 2.399143 13990.5
1150 -623.44713 -661.21299 37.765857 1358.9274 405.14554 -664.09567 45.578739 -709.6744 0 2.8826753 15.437367 3.1381305 13990.5
1200 -623.46549 -660.91706 37.451568 1347.6183 699.78996 -664.0883 46.36297 -710.45127 0 3.1712473 15.109665 1.8891886 13990.5
1250 -623.49296 -658.2218 34.728838 1249.6464 1061.0154 -661.29052 47.668699 -708.95922 0 3.0687228 14.901367 2.3964137 13990.5
1300 -623.49837 -660.91022 37.411844 1346.1889 226.99512 -664.35989 45.352287 -709.71217 0 3.4496704 15.161542 2.2137993 13990.5
1350 -623.46718 -658.80365 35.336469 1271.5108 1039.6469 -662.16908 47.565671 -709.73475 0 3.3654314 15.892516 2.7888426 13990.5
1400 -623.47124 -661.45375 37.982513 1366.7233 -379.56023 -664.6321 43.788306 -708.42041 0 3.1783497 14.251126 1.7415409 13990.5
1450 -623.46671 -660.17518 36.708464 1320.8792 -374.37056 -662.92706 44.083648 -707.01071 0 2.7518803 15.210167 1.9984277 13990.5
1500 -623.50515 -659.06488 35.559725 1279.5442 260.37822 -662.39548 45.779764 -708.17524 0 3.3306005 14.682396 2.4201107 13990.5
Loop time of 8.26746 on 4 procs for 1000 steps with 432 atoms
Performance: 20.901 ns/day, 1.148 hours/ns, 120.956 timesteps/s
99.7% CPU use with 4 MPI tasks x no OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 6.706 | 7.1568 | 7.6597 | 12.7 | 86.57
Bond | 0.0014617 | 0.0015531 | 0.0016506 | 0.2 | 0.02
Neigh | 0.10511 | 0.10522 | 0.10532 | 0.0 | 1.27
Comm | 0.48547 | 0.98841 | 1.4393 | 34.0 | 11.96
Output | 0.0012085 | 0.0012462 | 0.0013196 | 0.1 | 0.02
Modify | 0.0021446 | 0.0021989 | 0.0022545 | 0.1 | 0.03
Other | | 0.01204 | | | 0.15
Nlocal: 108 ave 114 max 94 min
Histogram: 1 0 0 0 0 0 0 0 1 2
Nghost: 6512.25 ave 6586 max 6456 min
Histogram: 1 0 0 2 0 0 0 0 0 1
Neighs: 74248.2 ave 77441 max 65858 min
Histogram: 1 0 0 0 0 0 0 0 0 3
Total # of neighbors = 296993
Ave neighs/atom = 687.484
Ave special neighs/atom = 1
Neighbor list builds = 46
Dangerous builds = 0
Total wall time: 0:00:12

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,17 @@
# DATE: 2017-02-20 CONTRIBUTOR: Mitchell Wood mitwood@sandia.gov CITATION: Wood, M. A. and Thompson, A. P. to appear in arxiv Feb2017, W-He and He-He from Juslin, N. and Wirth, B. D. Journal of Nuclear Materials, 423, (2013) p61-63
#
# Definition of SNAP+ZBL+Tabulated potential.
variable zblcutinner equal 4
variable zblcutouter equal 4.8
variable zblz equal 74
# Specify hybrid with SNAP, ZBL, and long-range Coulomb
pair_style hybrid/overlay zbl ${zblcutinner} ${zblcutouter} snap table spline 10000 table spline 10000
pair_coeff 1 1 zbl ${zblz} ${zblz}
pair_coeff * * snap W_2940_2017_2.snapcoeff W W_2940_2017_2.snapparam W NULL
pair_coeff 2 2 table 1 He_He_JW2013.table HeHe
pair_coeff 1 2 table 2 W_He_JW2013.table WHe
#Hybrid/overlay will take all pair styles and add their contributions equally, order of pair_coeff doesnt matter here
#This is not the case for pair_style hybrid ... where only one pair_coeff is read for each type combination, order matters here.

View File

@ -0,0 +1,16 @@
# DATE: 2017-02-20 CONTRIBUTOR: Mitchell Wood mitwood@sandia.gov CITATION: Wood, M. A. and Thompson, A. P. to appear in arxiv Feb2017
#
# Definition of SNAP+ZBL potential.
variable zblcutinner equal 4
variable zblcutouter equal 4.8
variable zblz equal 74
# Specify hybrid with SNAP, ZBL, and long-range Coulomb
pair_style hybrid/overlay &
zbl ${zblcutinner} ${zblcutouter} &
snap
pair_coeff 1 1 zbl ${zblz} ${zblz}
pair_coeff * * snap W_2940_2017_2.snapcoeff W W_2940_2017_2.snapparam W
#Nomenclature on the snap files are Element_DakotaID_Year_Month

View File

@ -0,0 +1,62 @@
# DATE: 2017-02-20 CONTRIBUTOR: Mitchell Wood mitwood@sandia.gov CITATION: Wood, M. A. and Thompson, A. P. to appear in arxiv Feb2017
#
# LAMMPS SNAP coefficients for W
1 56
W 0.5 1
0.781170857801
-0.001794941735
-0.016628679036
-0.066625537037
-0.073716343967
-0.062913923923
0.032552694672
-0.134901744419
-0.075076334103
-0.148558616547
-0.140808831101
-0.166749145704
-0.047487675984
-0.049892090603
-0.032483739965
-0.114766534860
-0.106759718242
-0.125894850485
-0.103409735225
-0.095247335447
-0.061998736346
-0.053895610976
-0.010799734206
-0.011644828900
-0.028316826924
0.011176085541
0.064619474684
-0.023886279996
-0.004099224312
-0.056084222496
-0.035551497650
-0.056678501024
-0.004905851656
-0.015701146162
-0.008462280779
0.016429018676
0.032432633993
-0.010805361272
-0.014841893457
0.019414134562
-0.008112452759
-0.002700775447
0.007032887063
-0.009706065042
0.008385967833
0.028606085876
-0.007003591067
0.006467260152
-0.006666986361
0.029243285316
0.002477673872
-0.000199497504
0.004068954075
0.006036129972
-0.013010633924
-0.008314173699

View File

@ -0,0 +1,12 @@
# DATE: 2017-02-20 CONTRIBUTOR: Mitchell Wood mitwood@sandia.gov CITATION: Wood, M. A. and Thompson, A. P. to appear in arxiv Feb2017
#
# required
rcutfac 4.73442
twojmax 8
# optional
gamma 1
rfac0 0.99363
rmin0 0
diagonalstyle 3

View File

@ -0,0 +1,333 @@
# DATE: 2017-02-20 CONTRIBUTOR: Mitchell Wood mitwood@sandia.gov CITATION: Juslin, N. and Wirth, B. D. Journal of Nuclear Materials, 423, (2013) p61-63
#
#-> LAMMPS Potential File in tabular format. <-#
# N. Juslin and B.D. Wirth, J. Nucl. Mater. 432: 61-66 (2013). #
# Implemented by K.D. Hammond (2013) karlh@utk.edu
WHe
N 325
1 0.000001 193967.941432641 21165142.7035409
2 0.02 88781.7058271842 5199760.86773195
3 0.03 54461.7226844158 2255783.27652381
4 0.04 37754.4525147374 1233285.07338825
5 0.05 28027.0551422937 765154.791685769
6 0.06 21746.7711436002 514318.047209743
7 0.07 17407.2122555268 365457.111099578
8 0.08 14260.380890756 270524.172812057
9 0.09 11894.315375388 206649.630589727
10 0.1 10064.3593879466 161847.200920021
11 0.11 8616.61401638085 129358.142417815
12 0.12 7449.7679909506 105145.917230655
13 0.13 6494.61875555217 86685.1834957916
14 0.14 5702.42313976808 72334.2424211515
15 0.15 5037.94580316996 60990.8171175998
16 0.16 4475.14199778904 51894.0939422829
17 0.17 3994.38401078788 44506.229619365
18 0.18 3580.62666819885 38438.9489608325
19 0.19 3222.16279673461 33406.7013437966
20 0.2 2909.75986785247 29195.976537891
21 0.21 2636.04908399207 25644.7364933058
22 0.22 2395.08534986163 22628.3414553063
23 0.23 2182.02521022247 20049.740107058
24 0.24 1992.88767473625 17832.5164484929
25 0.25 1824.374223221 15915.8858113167
26 0.26 1673.73168801722 14251.0430039002
27 0.27 1538.64662154286 12798.4627755747
28 0.28 1417.16307190682 11525.8804272463
29 0.29 1307.61796220838 10406.7644777657
30 0.3 1208.58985019291 9419.14958758174
31 0.31 1118.8579596074 8544.73619272459
32 0.32 1037.36917029033 7768.18965475981
33 0.33 963.211228625566 7076.59012069084
34 0.34 895.590859433663 6458.9972713357
35 0.35 833.815769676712 5906.10340798657
36 0.36 777.27976462076 5409.9550164674
37 0.37 725.450370074705 4963.72782166695
38 0.38 677.858485369477 4561.54392990095
39 0.39 634.089691821654 4198.3223154716
40 0.4 593.776918443069 3869.65589682796
41 0.41 556.594226358163 3571.70994736531
42 0.42 522.251519987548 3301.13772506169
43 0.43 490.490029664824 3055.01007668961
44 0.44 461.07843929567 2830.75644369566
45 0.45 433.809555685745 2626.11521737135
46 0.46 408.497434574275 2439.09179705296
47 0.47 384.974893215623 2267.92302383156
48 0.48 363.091351319355 2111.04691385653
49 0.49 342.710951882019 1967.07681501168
50 0.5 323.710921379843 1834.77927007332
51 0.51 305.980135298364 1713.05499722693
52 0.52 289.417860333203 1600.9225017701
53 0.53 273.932649026687 1497.50391617084
54 0.54 259.44136628296 1402.0127334168
55 0.55 245.86833026895 1313.74315392891
56 0.56 233.144552771494 1232.06081168718
57 0.57 221.207066231741 1156.3946825672
58 0.58 209.998326488761 1086.23000874818
59 0.59 199.465681793803 1021.10209864685
60 0.6 189.560899952328 960.590883129588
61 0.61 180.239746551915 904.316126537919
62 0.62 171.461608172096 851.933205957983
63 0.63 163.189155273495 803.129384679904
64 0.64 155.388040149985 757.62051633995
65 0.65 148.026625916815 715.148125151611
66 0.66 141.075743014823 675.476815185188
67 0.67 134.508470148273 638.391968073426
68 0.68 128.299936952045 603.697693987706
69 0.69 122.427146011509 571.215005397664
70 0.7 116.868812142757 540.780187122909
71 0.71 111.605217088269 512.243339613345
72 0.72 106.618077998551 485.467075341772
73 0.73 101.890428258492 460.32535073159
74 0.74 97.4065093817089 436.702418234594
75 0.75 93.1516728402801 414.491885070093
76 0.76 89.1122908237631 393.595866780115
77 0.77 85.2756750325343 373.92422518237
78 0.78 81.6300027083206 355.393881543952
79 0.79 78.1642491910339 337.928196880203
80 0.8 74.8681263671512 321.456412227117
81 0.81 71.7320264421784 305.913142560798
82 0.82 68.7469705293107 291.237918759986
83 0.83 65.9045615992059 277.374772641144
84 0.84 63.1969413826496 264.271860652001
85 0.85 60.6167508595336 251.881122298665
86 0.86 58.1570940046202 240.157969812335
87 0.87 55.8115044935616 229.061005941579
88 0.88 53.5739151020755 218.551767091662
89 0.89 51.4386295574478 208.594489329042
90 0.9 49.400296625012 199.15589503172
91 0.91 47.4538862332645 190.204998198848
92 0.92 45.5946674600827 181.712926639528
93 0.93 43.818188219384 173.652759444123
94 0.94 42.1202565026948 165.999378304597
95 0.95 40.4969230436942 158.729331395598
96 0.96 38.944465286023 151.820708657476
97 0.97 37.4593725456438 145.253027437895
98 0.98 36.03833226895 139.00712755184
99 0.99 34.6782172967515 133.065074912029
100 1 33.3760740523261 127.410072964226
101 1.01 32.1291115790037 122.026381235854
102 1.02 30.9346913593292 116.899240372539
103 1.03 29.7903178538011 112.014803096661
104 1.04 28.6936297025718 107.360070575369
105 1.05 27.6423915383737 102.922833733498
106 1.06 26.6344863633652 98.69161908997
107 1.07 25.6679084466039 94.6556387351718
108 1.08 24.7407567025028 90.804744101767
109 1.09 23.8272933774771 87.0424919424945
110 1.1 22.9746938084833 83.501083250349
111 1.11 22.1568053467682 80.0995862713648
112 1.12 21.372249301539 76.8339257499533
113 1.13 20.6196874341871 73.7000865054019
114 1.14 19.8978213575361 70.6941134318681
115 1.15 19.2053919350916 67.8121114983956
116 1.16 18.5411786802966 65.0502457488983
117 1.17 17.9039991557794 62.4047413021631
118 1.18 17.2927083726064 59.8718833518619
119 1.19 16.7061981895337 57.4480171665332
120 1.2 16.1433967122567 55.1295480895992
121 1.21 15.6032676926612 52.9129415393513
122 1.22 15.0848099280771 50.7947230089626
123 1.23 14.5870566605305 48.7714780664787
124 1.24 14.1090749759875 46.8398523548244
125 1.25 13.6499652036138 44.9965515917961
126 1.26 13.2088603150229 43.2383415700692
127 1.27 12.784925323525 41.5620481571964
128 1.28 12.3773566833817 39.9645572956038
129 1.29 11.9853816890583 38.4428150025965
130 1.3 11.60825787447 36.9938273703497
131 1.31 11.2452724122346 35.6146605659201
132 1.32 10.8957415129272 34.3024408312399
133 1.33 10.5590098243284 33.0543544831148
134 1.34 10.234449830679 31.8676479132323
135 1.35 9.92146125192414 30.739627588146
136 1.36 9.61947044297006 29.6676600492951
137 1.37 9.32792979293674 28.6491719129885
138 1.38 9.04631712440562 27.6816498704143
139 1.39 8.77413509267092 26.7626406876384
140 1.4 8.51091058499253 25.8897512055996
141 1.41 8.25619411984519 25.0606483401084
142 1.42 8.00955924617449 24.2730590818669
143 1.43 7.77060194264106 23.524770496429
144 1.44 7.53894001687615 22.8136297242495
145 1.45 7.31421250473534 22.1375439806457
146 1.46 7.09607906954318 21.4944805558102
147 1.47 6.88421940134788 20.8824668148159
148 1.48 6.678332616175 20.2995901976151
149 1.49 6.47813665527576 19.743998219024
150 1.5 6.28336768437487 19.2138984687485
151 1.51 6.09377949293196 18.7075586113629
152 1.52 5.90914289338127 18.2233063863173
153 1.53 5.72924512039265 17.759529607943
154 1.54 5.55388923011117 17.3146761654411
155 1.55 5.38289349942409 16.8872540228954
156 1.56 5.21609082520001 16.4758312192575
157 1.57 5.05332812353902 16.0790358683639
158 1.58 4.89446572903546 15.6955561589207
159 1.59 4.73937679401809 15.3241403545121
160 1.6 4.58794668780786 14.9635967935988
161 1.61 4.44007239596442 14.6127938895147
162 1.62 4.29566191954302 14.2706601304803
163 1.63 4.15463367433779 13.9361840795741
164 1.64 4.01691589014092 13.6084143747644
165 1.65 3.88244600998598 13.2864597288958
166 1.66 3.75117008941481 12.9694889296766
167 1.67 3.62304219570535 12.6567308397093
168 1.68 3.49802380713948 12.3474743964548
169 1.69 3.37608321225264 12.04106861226
170 1.7 3.25719490908079 11.7369225743464
171 1.71 3.14133900441368 11.4345054448136
172 1.72 3.02850061304264 11.1333464606264
173 1.73 2.91866925702027 10.8330349336425
174 1.74 2.81183826489826 10.533220250582
175 1.75 2.70800417099883 10.233611873045
176 1.76 2.60716611463897 9.93397933750975
177 1.77 2.50932523941003 9.63415225533254
178 1.78 2.41448409240832 9.3340203127359
179 1.79 2.3226460234938 9.03353327083141
180 1.8 2.23381458454253 8.732700965596
181 1.81 2.14799292869986 8.43159330788831
182 1.82 2.06518320961743 8.13034028344418
183 1.83 1.98538598072548 7.82913195286937
184 1.84 1.90859959447164 7.52821845164772
185 1.85 1.83481960157053 7.2279099901466
186 1.86 1.7640381502589 6.92857685359786
187 1.87 1.69624338555161 6.63064940211416
188 1.88 1.63141884849222 6.33461807069079
189 1.89 1.56954287538349 6.04103336918388
190 1.9 1.51058799706618 5.75050588234672
191 1.91 1.45452033816503 5.46370626978796
192 1.92 1.40129901631838 5.18136526601165
193 1.93 1.35087554145878 4.90427368037354
194 1.94 1.30319321504351 4.63328239712837
195 1.95 1.25818652931969 4.36930237539354
196 1.96 1.21578056655756 4.11330464916909
197 1.97 1.17589039832831 3.86632032733041
198 1.98 1.13842048472736 3.62944059362462
199 1.99 1.10326407363948 3.4038167066833
200 2 1.07030260000113 3.19065999999702
201 2.01 1.03940508502285 2.99124188195356
202 2.02 1.01042753546074 2.80689383580147
203 2.03 0.98321234287971 2.63900741967382
204 2.04 0.957587682867711 2.48903426657353
205 2.05 0.93336691431989 2.35848608439528
206 2.06 0.910347978676441 2.24893465587638
207 2.07 0.888312799175878 2.16201183866769
208 2.08 0.867026680100139 2.09940956527362
209 2.09 0.846311972535292 2.0501194750481
210 2.1 0.826052090711088 2.00205155230487
211 2.11 0.806267097861257 1.95513674544071
212 2.12 0.786945609506685 1.90934585391834
213 2.13 0.7680765291529 1.86465047725327
214 2.14 0.749649040410966 1.82102299086693
215 2.15 0.731652599355765 1.77843652275056
216 2.16 0.714076927113717 1.73686493090964
217 2.17 0.696912002672274 1.69628278155967
218 2.18 0.680148055903811 1.65666532804561
219 2.19 0.663775560796824 1.61798849045792
220 2.2 0.647785228887586 1.5802288359199
221 2.21 0.632168002885678 1.54336355952145
222 2.22 0.616915050487049 1.50737046587586
223 2.23 0.602017758368489 1.47222795127685
224 2.24 0.587467726357617 1.43791498643425
225 2.25 0.573256761772716 1.40441109976729
226 2.26 0.559376873926913 1.37169636123572
227 2.27 0.54582026879146 1.33975136668933
228 2.28 0.532579343812991 1.30855722271748
229 2.29 0.519646682879861 1.27809553198093
230 2.3 0.507015051432831 1.24834837900889
231 2.31 0.494677391715517 1.21929831644497
232 2.32 0.482626818160208 1.19092835172618
233 2.33 0.470856612904789 1.16322193418007
234 2.34 0.459360221436662 1.1361629425252
235 2.35 0.448131248359705 1.10973567276124
236 2.36 0.437163453280438 1.08392482643501
237 2.37 0.426450746809703 1.05871549926971
238 2.38 0.415987186676291 1.0340931701448
239 2.39 0.405766973949056 1.01004369041464
240 2.4 0.395784449364215 0.986553273554313
241 2.41 0.38603408975458 0.963608485121541
242 2.42 0.376510504577652 0.941196233024116
243 2.43 0.367208432539548 0.919303758082476
244 2.44 0.358122738311864 0.897918624877599
245 2.45 0.349248409338683 0.877028712874665
246 2.46 0.340580552730998 0.85662220781334
247 2.47 0.332114392245936 0.83668759335581
248 2.48 0.323845265348255 0.8172136429841
249 2.49 0.315768620351645 0.798189412138432
250 2.5 0.307880013637488 0.779604230588763
251 2.51 0.300175106948751 0.761447695031842
252 2.52 0.292649664756829 0.743709661906479
253 2.53 0.285299551699157 0.726380240419923
254 2.54 0.278120730085547 0.70944978577854
255 2.55 0.271109257471214 0.69290889261619
256 2.56 0.264149883217677 0.704309455966822
257 2.57 0.257023670963235 0.720548414483017
258 2.58 0.249746544950963 0.73449858053557
259 2.59 0.242341191411443 0.746201328856265
260 2.6 0.234829852474346 0.755704032488528
261 2.61 0.227234268357011 0.763059628544665
262 2.62 0.219575623890343 0.768326185205915
263 2.63 0.211874499363033 0.771566471270733
264 2.64 0.204150825652343 0.772847529495918
265 2.65 0.196423843597567 0.772240254912945
266 2.66 0.188712067560753 0.769818979238484
267 2.67 0.181033253108439 0.765661062433794
268 2.68 0.17340436873789 0.759846492402718
269 2.69 0.165841571561793 0.752457493752577
270 2.7 0.158360186856416 0.743578146476553
271 2.71 0.150974691370005 0.733294015350452
272 2.72 0.14369870028059 0.721691790771177
273 2.73 0.136544957685405 0.708858941699004
274 2.74 0.129525330497857 0.69488338130108
275 2.75 0.122650805622294 0.679853145829612
276 2.76 0.115931490271846 0.663856087205069
277 2.77 0.10937661529017 0.646979579712699
278 2.78 0.102994541334199 0.629310241159726
279 2.79 0.0967927677717845 0.610933668781077
280 2.8 0.0907779441455664 0.591934190123306
281 2.81 0.0849558840523623 0.572394629079895
282 2.82 0.0793315812859323 0.552396087196204
283 2.83 0.0739092280900552 0.532017740309284
284 2.84 0.0686922353684514 0.51133665053656
285 2.85 0.0636832546982004 0.490427593578163
286 2.86 0.058884201993894 0.469362901250494
287 2.87 0.0542962826708072 0.448212319123508
288 2.88 0.0499200181568738 0.427042879091302
289 2.89 0.04575527360515 0.405918786664859
290 2.9 0.0418012866607671 0.384901322737369
291 2.91 0.0380566971390415 0.364048759536354
292 2.92 0.0345195774744505 0.343416290443013
293 2.93 0.0311874638035265 0.323055973327595
294 2.94 0.0280573875483925 0.303016687020528
295 2.95 0.0251259073715924 0.283344100512051
296 2.96 0.0223891413770724 0.264080654448684
297 2.97 0.0198427994365954 0.245265554472591
298 2.98 0.017482215525515 0.226934775930029
299 2.99 0.0153023799566589 0.20912107945741
300 3 0.0132979714060716 0.191854036938167
301 3.01 0.0114633886294948 0.175160067310366
302 3.02 0.009792781773727 0.159062481694054
303 3.03 0.00828008319235769 0.143581537298372
304 3.04 0.00691903768080766 0.128734499561665
305 3.05 0.00570323205109961 0.114535711972949
306 3.06 0.0046261239723178 0.100996673020243
307 3.07 0.00368107000826405 0.088126119710232
308 3.08 0.00286135278937321 0.0759301171045549
309 3.09 0.00216020726148435 0.0644121533205429
310 3.1 0.00157084595956787 0.0535732394484653
311 3.11 0.00108648325995938 0.0434120138431347
312 3.12 0.000700358570037423 0.0339248502550448
313 3.13 0.000405758419588332 0.0251059692749879
314 3.14 0.000196037423311657 0.0169475525761798
315 3.15 6.46380890244662E-05 0.00943985944932857
316 3.16 5.1094511057313E-06 0.00257134513861945
317 3.17 0 0
318 3.18 0 0
319 3.19 0 0
320 3.2 0 0
321 3.21 0 0
322 3.22 0 0
323 3.23 0 0
324 3.24 0 0
325 3.25 0 0

View File

@ -0,0 +1,45 @@
# Demonstrate SNAP Ta potential
# Initialize simulation
variable nsteps index 100
variable nrep equal 4
variable a equal 3.1803
units metal
# generate the box and atom positions using a BCC lattice
variable nx equal ${nrep}
variable ny equal ${nrep}
variable nz equal ${nrep}
boundary p p p
lattice bcc $a
region box block 0 ${nx} 0 ${ny} 0 ${nz}
create_box 1 box
create_atoms 1 box
mass 1 183.84
# choose potential
include W_2940_2017_2.pot.snap
# Setup output
thermo 10
thermo_modify norm yes
# Set up NVE run
timestep 0.5e-3
neighbor 1.0 bin
neigh_modify once no every 1 delay 0 check yes
# Run MD
velocity all create 300.0 4928459
fix 1 all nve
run ${nsteps}

View File

@ -0,0 +1,48 @@
# Demonstrate SNAP Ta potential
# Initialize simulation
variable nsteps index 100
variable nrep equal 4
variable a equal 3.1803
units metal
# generate the box and atom positions using a BCC lattice
variable nx equal ${nrep}
variable ny equal ${nrep}
variable nz equal ${nrep}
boundary p p p
lattice bcc $a
region box block 0 ${nx} 0 ${ny} 0 ${nz}
create_box 2 box
create_atoms 1 box
mass 1 183.84
mass 2 4.0026
set group all type/fraction 2 0.05 3590153 # Change 5% of W to He
group tungsten type 1
group helium type 2
# choose potential
include W.SNAP_HePair.pot
# Setup output
thermo 10
thermo_modify norm yes
# Set up NVE run
timestep 0.5e-3
neighbor 1.0 bin
neigh_modify once no every 1 delay 0 check yes
# Run MD
velocity all create 300.0 4928459
fix 1 all nve
run ${nsteps}

View File

@ -0,0 +1,144 @@
LAMMPS (13 Feb 2017)
# Demonstrate SNAP Ta potential
# Initialize simulation
variable nsteps index 100
variable nrep equal 4
variable a equal 3.1803
units metal
# generate the box and atom positions using a BCC lattice
variable nx equal ${nrep}
variable nx equal 4
variable ny equal ${nrep}
variable ny equal 4
variable nz equal ${nrep}
variable nz equal 4
boundary p p p
lattice bcc $a
lattice bcc 3.1803
Lattice spacing in x,y,z = 3.1803 3.1803 3.1803
region box block 0 ${nx} 0 ${ny} 0 ${nz}
region box block 0 4 0 ${ny} 0 ${nz}
region box block 0 4 0 4 0 ${nz}
region box block 0 4 0 4 0 4
create_box 1 box
Created orthogonal box = (0 0 0) to (12.7212 12.7212 12.7212)
1 by 1 by 1 MPI processor grid
create_atoms 1 box
Created 128 atoms
mass 1 183.84
# choose potential
include W_2940_2017_2.pot.snap
# DATE: 2017-02-20 CONTRIBUTOR: Mitchell Wood mitwood@sandia.gov CITATION: Wood, M. A. and Thompson, A. P. to appear in arxiv Feb2017
#
# Definition of SNAP+ZBL potential.
variable zblcutinner equal 4
variable zblcutouter equal 4.8
variable zblz equal 74
# Specify hybrid with SNAP, ZBL, and long-range Coulomb
pair_style hybrid/overlay zbl ${zblcutinner} ${zblcutouter} snap
pair_style hybrid/overlay zbl 4 ${zblcutouter} snap
pair_style hybrid/overlay zbl 4 4.8 snap
pair_coeff 1 1 zbl ${zblz} ${zblz}
pair_coeff 1 1 zbl 74 ${zblz}
pair_coeff 1 1 zbl 74 74
pair_coeff * * snap W_2940_2017_2.snapcoeff W W_2940_2017_2.snapparam W
Reading potential file W_2940_2017_2.snapcoeff with DATE: 2017-02-20
SNAP Element = W, Radius 0.5, Weight 1
Reading potential file W_2940_2017_2.snapparam with DATE: 2017-02-20
SNAP keyword rcutfac 4.73442
SNAP keyword twojmax 8
SNAP keyword gamma 1
SNAP keyword rfac0 0.99363
SNAP keyword rmin0 0
SNAP keyword diagonalstyle 3
#Nomenclature on the snap files are Element_DakotaID_Year_Month
# Setup output
thermo 10
thermo_modify norm yes
# Set up NVE run
timestep 0.5e-3
neighbor 1.0 bin
neigh_modify once no every 1 delay 0 check yes
# Run MD
velocity all create 300.0 4928459
fix 1 all nve
run ${nsteps}
run 100
Neighbor list info ...
update every 1 steps, delay 0 steps, check yes
max neighbors/atom: 2000, page size: 100000
master list distance cutoff = 5.8
ghost atom cutoff = 5.8
binsize = 2.9, bins = 5 5 5
2 neighbor lists, perpetual/occasional/extra = 2 0 0
(1) pair zbl, perpetual, half/full from (2)
attributes: half, newton on
pair build: halffull/newton
stencil: none
bin: none
(2) pair snap, perpetual
attributes: full, newton on
pair build: full/bin/atomonly
stencil: full/bin/3d
bin: standard
Memory usage per processor = 5.14696 Mbytes
Step Temp E_pair E_mol TotEng Press
0 300 -11.028325 0 -10.98985 3010.497
10 293.40666 -11.027479 0 -10.989849 3246.0559
20 274.27375 -11.025025 0 -10.989849 3927.9497
30 244.50457 -11.021207 0 -10.989849 4983.5484
40 207.0784 -11.016407 0 -10.989849 6299.9473
50 165.74442 -11.011105 0 -10.989848 7736.5123
60 124.62181 -11.005831 0 -10.989848 9140.8587
70 87.744792 -11.001101 0 -10.989848 10366.489
80 58.605244 -10.997364 0 -10.989848 11289.914
90 39.754503 -10.994946 0 -10.989848 11824.945
100 32.524085 -10.994019 0 -10.989848 11932.118
Loop time of 11.8271 on 1 procs for 100 steps with 128 atoms
Performance: 0.365 ns/day, 65.706 hours/ns, 8.455 timesteps/s
99.9% CPU use with 1 MPI tasks x no OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 11.826 | 11.826 | 11.826 | 0.0 | 99.99
Neigh | 0 | 0 | 0 | 0.0 | 0.00
Comm | 0.00044084 | 0.00044084 | 0.00044084 | 0.0 | 0.00
Output | 0.00013232 | 0.00013232 | 0.00013232 | 0.0 | 0.00
Modify | 0.00021887 | 0.00021887 | 0.00021887 | 0.0 | 0.00
Other | | 0.0002718 | | | 0.00
Nlocal: 128 ave 128 max 128 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Nghost: 727 ave 727 max 727 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Neighs: 3712 ave 3712 max 3712 min
Histogram: 1 0 0 0 0 0 0 0 0 0
FullNghs: 7424 ave 7424 max 7424 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Total # of neighbors = 7424
Ave neighs/atom = 58
Neighbor list builds = 0
Dangerous builds = 0
Total wall time: 0:00:11

View File

@ -0,0 +1,144 @@
LAMMPS (13 Feb 2017)
# Demonstrate SNAP Ta potential
# Initialize simulation
variable nsteps index 100
variable nrep equal 4
variable a equal 3.1803
units metal
# generate the box and atom positions using a BCC lattice
variable nx equal ${nrep}
variable nx equal 4
variable ny equal ${nrep}
variable ny equal 4
variable nz equal ${nrep}
variable nz equal 4
boundary p p p
lattice bcc $a
lattice bcc 3.1803
Lattice spacing in x,y,z = 3.1803 3.1803 3.1803
region box block 0 ${nx} 0 ${ny} 0 ${nz}
region box block 0 4 0 ${ny} 0 ${nz}
region box block 0 4 0 4 0 ${nz}
region box block 0 4 0 4 0 4
create_box 1 box
Created orthogonal box = (0 0 0) to (12.7212 12.7212 12.7212)
1 by 2 by 2 MPI processor grid
create_atoms 1 box
Created 128 atoms
mass 1 183.84
# choose potential
include W_2940_2017_2.pot.snap
# DATE: 2017-02-20 CONTRIBUTOR: Mitchell Wood mitwood@sandia.gov CITATION: Wood, M. A. and Thompson, A. P. to appear in arxiv Feb2017
#
# Definition of SNAP+ZBL potential.
variable zblcutinner equal 4
variable zblcutouter equal 4.8
variable zblz equal 74
# Specify hybrid with SNAP, ZBL, and long-range Coulomb
pair_style hybrid/overlay zbl ${zblcutinner} ${zblcutouter} snap
pair_style hybrid/overlay zbl 4 ${zblcutouter} snap
pair_style hybrid/overlay zbl 4 4.8 snap
pair_coeff 1 1 zbl ${zblz} ${zblz}
pair_coeff 1 1 zbl 74 ${zblz}
pair_coeff 1 1 zbl 74 74
pair_coeff * * snap W_2940_2017_2.snapcoeff W W_2940_2017_2.snapparam W
Reading potential file W_2940_2017_2.snapcoeff with DATE: 2017-02-20
SNAP Element = W, Radius 0.5, Weight 1
Reading potential file W_2940_2017_2.snapparam with DATE: 2017-02-20
SNAP keyword rcutfac 4.73442
SNAP keyword twojmax 8
SNAP keyword gamma 1
SNAP keyword rfac0 0.99363
SNAP keyword rmin0 0
SNAP keyword diagonalstyle 3
#Nomenclature on the snap files are Element_DakotaID_Year_Month
# Setup output
thermo 10
thermo_modify norm yes
# Set up NVE run
timestep 0.5e-3
neighbor 1.0 bin
neigh_modify once no every 1 delay 0 check yes
# Run MD
velocity all create 300.0 4928459
fix 1 all nve
run ${nsteps}
run 100
Neighbor list info ...
update every 1 steps, delay 0 steps, check yes
max neighbors/atom: 2000, page size: 100000
master list distance cutoff = 5.8
ghost atom cutoff = 5.8
binsize = 2.9, bins = 5 5 5
2 neighbor lists, perpetual/occasional/extra = 2 0 0
(1) pair zbl, perpetual, half/full from (2)
attributes: half, newton on
pair build: halffull/newton
stencil: none
bin: none
(2) pair snap, perpetual
attributes: full, newton on
pair build: full/bin/atomonly
stencil: full/bin/3d
bin: standard
Memory usage per processor = 5.12833 Mbytes
Step Temp E_pair E_mol TotEng Press
0 300 -11.028325 0 -10.98985 3010.497
10 293.22504 -11.027456 0 -10.989849 3258.275
20 273.60084 -11.024939 0 -10.989849 3973.9038
30 243.15327 -11.021034 0 -10.989849 5077.9172
40 205.01905 -11.016142 0 -10.989849 6448.4941
50 163.10914 -11.010767 0 -10.989848 7935.6835
60 121.67854 -11.005453 0 -10.989848 9378.9959
70 84.846972 -11.000729 0 -10.989848 10626.301
80 56.127265 -10.997046 0 -10.989848 11551.687
90 38.025013 -10.994724 0 -10.989847 12069.936
100 31.768127 -10.993922 0 -10.989847 12145.648
Loop time of 3.03545 on 4 procs for 100 steps with 128 atoms
Performance: 1.423 ns/day, 16.864 hours/ns, 32.944 timesteps/s
99.9% CPU use with 4 MPI tasks x no OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 2.9594 | 2.9866 | 3.0319 | 1.6 | 98.39
Neigh | 0 | 0 | 0 | 0.0 | 0.00
Comm | 0.0024238 | 0.047825 | 0.075032 | 12.5 | 1.58
Output | 0.00021601 | 0.00024045 | 0.00027442 | 0.0 | 0.01
Modify | 9.6798e-05 | 0.00011188 | 0.00011802 | 0.0 | 0.00
Other | | 0.000698 | | | 0.02
Nlocal: 32 ave 32 max 32 min
Histogram: 4 0 0 0 0 0 0 0 0 0
Nghost: 431 ave 431 max 431 min
Histogram: 4 0 0 0 0 0 0 0 0 0
Neighs: 928 ave 928 max 928 min
Histogram: 4 0 0 0 0 0 0 0 0 0
FullNghs: 1856 ave 1856 max 1856 min
Histogram: 4 0 0 0 0 0 0 0 0 0
Total # of neighbors = 7424
Ave neighs/atom = 58
Neighbor list builds = 0
Dangerous builds = 0
Total wall time: 0:00:03

View File

@ -0,0 +1,179 @@
LAMMPS (13 Feb 2017)
# Demonstrate SNAP Ta potential
# Initialize simulation
variable nsteps index 100
variable nrep equal 4
variable a equal 3.1803
units metal
# generate the box and atom positions using a BCC lattice
variable nx equal ${nrep}
variable nx equal 4
variable ny equal ${nrep}
variable ny equal 4
variable nz equal ${nrep}
variable nz equal 4
boundary p p p
lattice bcc $a
lattice bcc 3.1803
Lattice spacing in x,y,z = 3.1803 3.1803 3.1803
region box block 0 ${nx} 0 ${ny} 0 ${nz}
region box block 0 4 0 ${ny} 0 ${nz}
region box block 0 4 0 4 0 ${nz}
region box block 0 4 0 4 0 4
create_box 2 box
Created orthogonal box = (0 0 0) to (12.7212 12.7212 12.7212)
1 by 1 by 1 MPI processor grid
create_atoms 1 box
Created 128 atoms
mass 1 183.84
mass 2 4.0026
set group all type/fraction 2 0.05 3590153 # Change 5% of W to He
5 settings made for type/fraction
group tungsten type 1
123 atoms in group tungsten
group helium type 2
5 atoms in group helium
# choose potential
include W.SNAP_HePair.pot
# DATE: 2017-02-20 CONTRIBUTOR: Mitchell Wood mitwood@sandia.gov CITATION: Wood, M. A. and Thompson, A. P. to appear in arxiv Feb2017, W-He and He-He from Juslin, N. and Wirth, B. D. Journal of Nuclear Materials, 423, (2013) p61-63
#
# Definition of SNAP+ZBL+Tabulated potential.
variable zblcutinner equal 4
variable zblcutouter equal 4.8
variable zblz equal 74
# Specify hybrid with SNAP, ZBL, and long-range Coulomb
pair_style hybrid/overlay zbl ${zblcutinner} ${zblcutouter} snap table spline 10000 table spline 10000
pair_style hybrid/overlay zbl 4 ${zblcutouter} snap table spline 10000 table spline 10000
pair_style hybrid/overlay zbl 4 4.8 snap table spline 10000 table spline 10000
pair_coeff 1 1 zbl ${zblz} ${zblz}
pair_coeff 1 1 zbl 74 ${zblz}
pair_coeff 1 1 zbl 74 74
pair_coeff * * snap W_2940_2017_2.snapcoeff W W_2940_2017_2.snapparam W NULL
Reading potential file W_2940_2017_2.snapcoeff with DATE: 2017-02-20
SNAP Element = W, Radius 0.5, Weight 1
Reading potential file W_2940_2017_2.snapparam with DATE: 2017-02-20
SNAP keyword rcutfac 4.73442
SNAP keyword twojmax 8
SNAP keyword gamma 1
SNAP keyword rfac0 0.99363
SNAP keyword rmin0 0
SNAP keyword diagonalstyle 3
pair_coeff 2 2 table 1 He_He_JW2013.table HeHe
Reading potential file He_He_JW2013.table with DATE: 2017-02-20
WARNING: 1 of 4999 force values in table are inconsistent with -dE/dr.
Should only be flagged at inflection points (../pair_table.cpp:476)
pair_coeff 1 2 table 2 W_He_JW2013.table WHe
Reading potential file W_He_JW2013.table with DATE: 2017-02-20
WARNING: 3 of 325 force values in table are inconsistent with -dE/dr.
Should only be flagged at inflection points (../pair_table.cpp:476)
#Hybrid/overlay will take all pair styles and add their contributions equally, order of pair_coeff doesnt matter here
#This is not the case for pair_style hybrid ... where only one pair_coeff is read for each type combination, order matters here.
# Setup output
thermo 10
thermo_modify norm yes
# Set up NVE run
timestep 0.5e-3
neighbor 1.0 bin
neigh_modify once no every 1 delay 0 check yes
# Run MD
velocity all create 300.0 4928459
fix 1 all nve
run ${nsteps}
run 100
Neighbor list info ...
update every 1 steps, delay 0 steps, check yes
max neighbors/atom: 2000, page size: 100000
master list distance cutoff = 5.8
ghost atom cutoff = 5.8
binsize = 2.9, bins = 5 5 5
6 neighbor lists, perpetual/occasional/extra = 6 0 0
(1) pair zbl, perpetual, half/full from (2)
attributes: half, newton on
pair build: halffull/newton/skip
stencil: none
bin: none
(2) pair snap, perpetual, skip from (6)
attributes: full, newton on
pair build: skip
stencil: none
bin: none
(3) pair table, perpetual, skip from (5)
attributes: half, newton on
pair build: skip
stencil: none
bin: none
(4) pair table, perpetual, skip from (5)
attributes: half, newton on
pair build: skip
stencil: none
bin: none
(5) neighbor class addition, perpetual, half/full from (6)
attributes: half, newton on
pair build: halffull/newton
stencil: none
bin: none
(6) neighbor class addition, perpetual
attributes: full, newton on
pair build: full/bin/atomonly
stencil: full/bin/3d
bin: standard
Memory usage per processor = 7.6729 Mbytes
Step Temp E_pair E_mol TotEng Press
0 300 -10.438105 0 -10.39963 -5445.2808
10 290.48923 -10.436885 0 -10.399629 -5646.4813
20 271.18868 -10.434409 0 -10.399629 -5654.4646
30 246.2601 -10.431212 0 -10.399629 -5281.8873
40 218.69918 -10.427677 0 -10.399629 -4343.3636
50 189.12519 -10.423885 0 -10.399629 -2903.1138
60 155.55701 -10.419579 0 -10.399629 -1402.2278
70 118.83581 -10.414869 0 -10.399629 -146.36141
80 85.903126 -10.410645 0 -10.399628 857.74986
90 65.223651 -10.407993 0 -10.399628 1494.2746
100 59.833542 -10.407302 0 -10.399628 1938.9164
Loop time of 11.0736 on 1 procs for 100 steps with 128 atoms
Performance: 0.390 ns/day, 61.520 hours/ns, 9.030 timesteps/s
99.9% CPU use with 1 MPI tasks x no OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 11.072 | 11.072 | 11.072 | 0.0 | 99.99
Neigh | 0.00041604 | 0.00041604 | 0.00041604 | 0.0 | 0.00
Comm | 0.00046253 | 0.00046253 | 0.00046253 | 0.0 | 0.00
Output | 0.0001657 | 0.0001657 | 0.0001657 | 0.0 | 0.00
Modify | 0.0002265 | 0.0002265 | 0.0002265 | 0.0 | 0.00
Other | | 0.0003119 | | | 0.00
Nlocal: 128 ave 128 max 128 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Nghost: 727 ave 727 max 727 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Neighs: 3425 ave 3425 max 3425 min
Histogram: 1 0 0 0 0 0 0 0 0 0
FullNghs: 6850 ave 6850 max 6850 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Total # of neighbors = 6850
Ave neighs/atom = 53.5156
Neighbor list builds = 1
Dangerous builds = 0
Total wall time: 0:00:11

View File

@ -0,0 +1,179 @@
LAMMPS (13 Feb 2017)
# Demonstrate SNAP Ta potential
# Initialize simulation
variable nsteps index 100
variable nrep equal 4
variable a equal 3.1803
units metal
# generate the box and atom positions using a BCC lattice
variable nx equal ${nrep}
variable nx equal 4
variable ny equal ${nrep}
variable ny equal 4
variable nz equal ${nrep}
variable nz equal 4
boundary p p p
lattice bcc $a
lattice bcc 3.1803
Lattice spacing in x,y,z = 3.1803 3.1803 3.1803
region box block 0 ${nx} 0 ${ny} 0 ${nz}
region box block 0 4 0 ${ny} 0 ${nz}
region box block 0 4 0 4 0 ${nz}
region box block 0 4 0 4 0 4
create_box 2 box
Created orthogonal box = (0 0 0) to (12.7212 12.7212 12.7212)
1 by 2 by 2 MPI processor grid
create_atoms 1 box
Created 128 atoms
mass 1 183.84
mass 2 4.0026
set group all type/fraction 2 0.05 3590153 # Change 5% of W to He
5 settings made for type/fraction
group tungsten type 1
123 atoms in group tungsten
group helium type 2
5 atoms in group helium
# choose potential
include W.SNAP_HePair.pot
# DATE: 2017-02-20 CONTRIBUTOR: Mitchell Wood mitwood@sandia.gov CITATION: Wood, M. A. and Thompson, A. P. to appear in arxiv Feb2017, W-He and He-He from Juslin, N. and Wirth, B. D. Journal of Nuclear Materials, 423, (2013) p61-63
#
# Definition of SNAP+ZBL+Tabulated potential.
variable zblcutinner equal 4
variable zblcutouter equal 4.8
variable zblz equal 74
# Specify hybrid with SNAP, ZBL, and long-range Coulomb
pair_style hybrid/overlay zbl ${zblcutinner} ${zblcutouter} snap table spline 10000 table spline 10000
pair_style hybrid/overlay zbl 4 ${zblcutouter} snap table spline 10000 table spline 10000
pair_style hybrid/overlay zbl 4 4.8 snap table spline 10000 table spline 10000
pair_coeff 1 1 zbl ${zblz} ${zblz}
pair_coeff 1 1 zbl 74 ${zblz}
pair_coeff 1 1 zbl 74 74
pair_coeff * * snap W_2940_2017_2.snapcoeff W W_2940_2017_2.snapparam W NULL
Reading potential file W_2940_2017_2.snapcoeff with DATE: 2017-02-20
SNAP Element = W, Radius 0.5, Weight 1
Reading potential file W_2940_2017_2.snapparam with DATE: 2017-02-20
SNAP keyword rcutfac 4.73442
SNAP keyword twojmax 8
SNAP keyword gamma 1
SNAP keyword rfac0 0.99363
SNAP keyword rmin0 0
SNAP keyword diagonalstyle 3
pair_coeff 2 2 table 1 He_He_JW2013.table HeHe
Reading potential file He_He_JW2013.table with DATE: 2017-02-20
WARNING: 1 of 4999 force values in table are inconsistent with -dE/dr.
Should only be flagged at inflection points (../pair_table.cpp:476)
pair_coeff 1 2 table 2 W_He_JW2013.table WHe
Reading potential file W_He_JW2013.table with DATE: 2017-02-20
WARNING: 3 of 325 force values in table are inconsistent with -dE/dr.
Should only be flagged at inflection points (../pair_table.cpp:476)
#Hybrid/overlay will take all pair styles and add their contributions equally, order of pair_coeff doesnt matter here
#This is not the case for pair_style hybrid ... where only one pair_coeff is read for each type combination, order matters here.
# Setup output
thermo 10
thermo_modify norm yes
# Set up NVE run
timestep 0.5e-3
neighbor 1.0 bin
neigh_modify once no every 1 delay 0 check yes
# Run MD
velocity all create 300.0 4928459
fix 1 all nve
run ${nsteps}
run 100
Neighbor list info ...
update every 1 steps, delay 0 steps, check yes
max neighbors/atom: 2000, page size: 100000
master list distance cutoff = 5.8
ghost atom cutoff = 5.8
binsize = 2.9, bins = 5 5 5
6 neighbor lists, perpetual/occasional/extra = 6 0 0
(1) pair zbl, perpetual, half/full from (2)
attributes: half, newton on
pair build: halffull/newton/skip
stencil: none
bin: none
(2) pair snap, perpetual, skip from (6)
attributes: full, newton on
pair build: skip
stencil: none
bin: none
(3) pair table, perpetual, skip from (5)
attributes: half, newton on
pair build: skip
stencil: none
bin: none
(4) pair table, perpetual, skip from (5)
attributes: half, newton on
pair build: skip
stencil: none
bin: none
(5) neighbor class addition, perpetual, half/full from (6)
attributes: half, newton on
pair build: halffull/newton
stencil: none
bin: none
(6) neighbor class addition, perpetual
attributes: full, newton on
pair build: full/bin/atomonly
stencil: full/bin/3d
bin: standard
Memory usage per processor = 7.65426 Mbytes
Step Temp E_pair E_mol TotEng Press
0 300 -10.438105 0 -10.39963 -5445.2808
10 292.13979 -10.437097 0 -10.39963 -5516.3963
20 272.55728 -10.434585 0 -10.399629 -5460.4268
30 245.06559 -10.431059 0 -10.399629 -5016.6351
40 212.79459 -10.42692 0 -10.399629 -3924.2175
50 178.03903 -10.422462 0 -10.399629 -2354.5485
60 141.62155 -10.417791 0 -10.399628 -595.41345
70 107.24843 -10.413383 0 -10.399628 1138.4107
80 79.985938 -10.409886 0 -10.399628 2392.1106
90 62.568933 -10.407652 0 -10.399628 3141.7027
100 56.697933 -10.406899 0 -10.399628 3583.9538
Loop time of 2.8757 on 4 procs for 100 steps with 128 atoms
Performance: 1.502 ns/day, 15.976 hours/ns, 34.774 timesteps/s
99.9% CPU use with 4 MPI tasks x no OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 2.7363 | 2.8122 | 2.8636 | 2.9 | 97.79
Neigh | 0 | 0 | 0 | 0.0 | 0.00
Comm | 0.011014 | 0.062439 | 0.13842 | 19.3 | 2.17
Output | 0.00023842 | 0.00025076 | 0.0002861 | 0.0 | 0.01
Modify | 9.2506e-05 | 9.9301e-05 | 0.00010395 | 0.0 | 0.00
Other | | 0.0006654 | | | 0.02
Nlocal: 32 ave 32 max 32 min
Histogram: 4 0 0 0 0 0 0 0 0 0
Nghost: 431 ave 431 max 431 min
Histogram: 4 0 0 0 0 0 0 0 0 0
Neighs: 856.25 ave 885 max 818 min
Histogram: 1 0 0 0 1 0 0 0 1 1
FullNghs: 1712.5 ave 1738 max 1658 min
Histogram: 1 0 0 0 0 0 0 0 2 1
Total # of neighbors = 6850
Ave neighs/atom = 53.5156
Neighbor list builds = 0
Dangerous builds = 0
Total wall time: 0:00:02

View File

@ -1,5 +1,27 @@
# Change Log
## [2.02.15](https://github.com/kokkos/kokkos/tree/2.02.15) (2017-02-10)
[Full Changelog](https://github.com/kokkos/kokkos/compare/2.02.07...2.02.15)
**Implemented enhancements:**
- Containers: Adding block partitioning to StaticCrsGraph [\#625](https://github.com/kokkos/kokkos/issues/625)
- Kokkos Make System can induce Errors on Cray Volta System [\#610](https://github.com/kokkos/kokkos/issues/610)
- OpenMP: error out if KOKKOS\_HAVE\_OPENMP is defined but not \_OPENMP [\#605](https://github.com/kokkos/kokkos/issues/605)
- CMake: fix standalone build with tests [\#604](https://github.com/kokkos/kokkos/issues/604)
- Change README \(that GitHub shows when opening Kokkos project page\) to tell users how to submit PRs [\#597](https://github.com/kokkos/kokkos/issues/597)
- Add correctness testing for all operators of Atomic View [\#420](https://github.com/kokkos/kokkos/issues/420)
- Allow assignment of Views with compatible memory spaces [\#290](https://github.com/kokkos/kokkos/issues/290)
- Build only one version of Kokkos library for tests [\#213](https://github.com/kokkos/kokkos/issues/213)
- Clean out old KOKKOS\_HAVE\_CXX11 macros clauses [\#156](https://github.com/kokkos/kokkos/issues/156)
- Harmonize Macro names [\#150](https://github.com/kokkos/kokkos/issues/150)
**Fixed bugs:**
- Cray and PGI: Kokkos\_Parallel\_Reduce [\#634](https://github.com/kokkos/kokkos/issues/634)
- Kokkos Make System can induce Errors on Cray Volta System [\#610](https://github.com/kokkos/kokkos/issues/610)
- Normal\(\) function random number generator doesn't give the expected distribution [\#592](https://github.com/kokkos/kokkos/issues/592)
## [2.02.07](https://github.com/kokkos/kokkos/tree/2.02.07) (2016-12-16)
[Full Changelog](https://github.com/kokkos/kokkos/compare/2.02.01...2.02.07)

View File

@ -1,4 +1,3 @@
IF(COMMAND TRIBITS_PACKAGE_DECL)
SET(KOKKOS_HAS_TRILINOS ON CACHE BOOL "")
ELSE()
@ -8,6 +7,7 @@ ENDIF()
IF(NOT KOKKOS_HAS_TRILINOS)
CMAKE_MINIMUM_REQUIRED(VERSION 2.8.11 FATAL_ERROR)
INCLUDE(cmake/tribits.cmake)
SET(CMAKE_CXX_STANDARD 11)
ENDIF()
#

View File

@ -7,6 +7,8 @@
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Kokkos is licensed under 3-clause BSD terms of use:
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:

View File

@ -7,7 +7,7 @@ CXXFLAGS=$(CCFLAGS)
#Options: OpenMP,Serial,Pthreads,Cuda
KOKKOS_DEVICES ?= "OpenMP"
#KOKKOS_DEVICES ?= "Pthreads"
#Options: KNC,SNB,HSW,Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal61,ARMv80,ARMv81,ARMv8-ThunderX,BGQ,Power7,Power8,KNL,BDW,SKX
#Options: KNC,SNB,HSW,Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal61,ARMv80,ARMv81,ARMv8-ThunderX,BGQ,Power7,Power8,Power9,KNL,BDW,SKX
KOKKOS_ARCH ?= ""
#Options: yes,no
KOKKOS_DEBUG ?= "no"
@ -192,7 +192,8 @@ KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX := $(strip $(shell echo $(KOKKOS_ARCH) |
KOKKOS_INTERNAL_USE_ARCH_BGQ := $(strip $(shell echo $(KOKKOS_ARCH) | grep BGQ | wc -l))
KOKKOS_INTERNAL_USE_ARCH_POWER7 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Power7 | wc -l))
KOKKOS_INTERNAL_USE_ARCH_POWER8 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Power8 | wc -l))
KOKKOS_INTERNAL_USE_ARCH_IBM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_BGQ)+$(KOKKOS_INTERNAL_USE_ARCH_POWER7)+$(KOKKOS_INTERNAL_USE_ARCH_POWER8) | bc))
KOKKOS_INTERNAL_USE_ARCH_POWER9 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Power9 | wc -l))
KOKKOS_INTERNAL_USE_ARCH_IBM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_BGQ)+$(KOKKOS_INTERNAL_USE_ARCH_POWER7)+$(KOKKOS_INTERNAL_USE_ARCH_POWER8)+$(KOKKOS_INTERNAL_USE_ARCH_POWER9) | bc))
#AMD based
KOKKOS_INTERNAL_USE_ARCH_AMDAVX := $(strip $(shell echo $(KOKKOS_ARCH) | grep AMDAVX | wc -l))
@ -206,7 +207,7 @@ KOKKOS_INTERNAL_USE_ARCH_AVX512XEON := $(strip $(shell echo $(KOKKOS_INTERNAL_US
# Decide what ISA level we are able to support
KOKKOS_INTERNAL_USE_ISA_X86_64 := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_SNB)+$(KOKKOS_INTERNAL_USE_ARCH_HSW)+$(KOKKOS_INTERNAL_USE_ARCH_BDW)+$(KOKKOS_INTERNAL_USE_ARCH_KNL)+$(KOKKOS_INTERNAL_USE_ARCH_SKX) | bc ))
KOKKOS_INTERNAL_USE_ISA_KNC := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KNC) | bc ))
KOKKOS_INTERNAL_USE_ISA_POWERPCLE := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_POWER8) | bc ))
KOKKOS_INTERNAL_USE_ISA_POWERPCLE := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_POWER8)+$(KOKKOS_INTERNAL_USE_ARCH_POWER9) | bc ))
#Incompatible flags?
KOKKOS_INTERNAL_USE_ARCH_MULTIHOST := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_AVX)+$(KOKKOS_INTERNAL_USE_ARCH_AVX2)+$(KOKKOS_INTERNAL_USE_ARCH_KNC)+$(KOKKOS_INTERNAL_USE_ARCH_IBM)+$(KOKKOS_INTERNAL_USE_ARCH_AMDAVX)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV80)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV81)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX)>1" | bc ))
@ -453,6 +454,17 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER8), 1)
endif
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER9), 1)
tmp := $(shell echo "\#define KOKKOS_ARCH_POWER9 1" >> KokkosCore_config.tmp )
ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
else
# Assume that this is a really a GNU compiler or it could be XL on P9
KOKKOS_CXXFLAGS += -mcpu=power9 -mtune=power9
KOKKOS_LDFLAGS += -mcpu=power9 -mtune=power9
endif
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX2), 1)
tmp := $(shell echo "\#define KOKKOS_ARCH_AVX2 1" >> KokkosCore_config.tmp )
ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)

View File

@ -5,6 +5,9 @@ Kokkos is designed to target complex node architectures with N-level memory
hierarchies and multiple types of execution resources. It currently can use
OpenMP, Pthreads and CUDA as backend programming models.
Kokkos is licensed under standard 3-clause BSD terms of use. For specifics
see the LICENSE file contained in the repository or distribution.
The core developers of Kokkos are Carter Edwards and Christian Trott
at the Computer Science Research Institute of the Sandia National
Laboratories.
@ -152,3 +155,11 @@ multiple MPI ranks]) you can set CUDA_MANAGED_FORCE_DEVICE_ALLOC=1.
This will enforce proper UVM allocations, but can lead to errors if
more than a single GPU is used by a single process.
===========================================================================
====Contributing===========================================================
===========================================================================
Contributions to Kokkos are welcome. In order to do so, please open an issue
where a feature request or bug can be discussed. Then issue a pull request
with your contribution. Pull requests must be issued against the develop branch.

View File

@ -1014,7 +1014,7 @@ namespace Kokkos {
}
};
#if defined(KOKKOS_HAVE_CUDA) && defined(__CUDACC__)
#if defined(KOKKOS_ENABLE_CUDA) && defined(__CUDACC__)
template<>
class Random_XorShift1024<Kokkos::Cuda> {

View File

@ -49,7 +49,7 @@
#include <Kokkos_Core.hpp>
#ifdef KOKKOS_HAVE_CUDA
#ifdef KOKKOS_ENABLE_CUDA
#include <TestRandom.hpp>
#include <TestSort.hpp>
@ -106,5 +106,5 @@ CUDA_SORT_UNSIGNED(171)
#undef CUDA_SORT_UNSIGNED
}
#endif /* #ifdef KOKKOS_HAVE_CUDA */
#endif /* #ifdef KOKKOS_ENABLE_CUDA */

View File

@ -52,7 +52,7 @@
namespace Test {
#ifdef KOKKOS_HAVE_OPENMP
#ifdef KOKKOS_ENABLE_OPENMP
class openmp : public ::testing::Test {
protected:
static void SetUpTestCase()

View File

@ -55,7 +55,7 @@
namespace Test {
#ifdef KOKKOS_HAVE_SERIAL
#ifdef KOKKOS_ENABLE_SERIAL
class serial : public ::testing::Test {
protected:
static void SetUpTestCase()
@ -93,7 +93,7 @@ SERIAL_SORT_UNSIGNED(171)
#undef SERIAL_RANDOM_XORSHIFT1024
#undef SERIAL_SORT_UNSIGNED
#endif // KOKKOS_HAVE_SERIAL
#endif // KOKKOS_ENABLE_SERIAL
} // namespace Test

View File

@ -55,7 +55,7 @@
namespace Test {
#ifdef KOKKOS_HAVE_PTHREAD
#ifdef KOKKOS_ENABLE_PTHREAD
class threads : public ::testing::Test {
protected:
static void SetUpTestCase()

View File

@ -4,13 +4,31 @@ INCLUDE(CTest)
cmake_policy(SET CMP0054 NEW)
IF(NOT DEFINED ${PROJECT_NAME})
project(Kokkos)
project(KokkosCMake)
ENDIF()
IF(NOT DEFINED ${${PROJECT_NAME}_ENABLE_DEBUG}})
MESSAGE(WARNING "The project name is: ${PROJECT_NAME}")
IF(NOT DEFINED ${PROJECT_NAME}_ENABLE_OpenMP)
SET(${PROJECT_NAME}_ENABLE_OpenMP OFF)
ENDIF()
IF(NOT DEFINED ${PROJECT_NAME}_ENABLE_DEBUG)
SET(${PROJECT_NAME}_ENABLE_DEBUG OFF)
ENDIF()
IF(NOT DEFINED ${PROJECT_NAME}_ENABLE_CXX11)
SET(${PROJECT_NAME}_ENABLE_CXX11 ON)
ENDIF()
IF(NOT DEFINED ${PROJECT_NAME}_ENABLE_TESTS)
SET(${PROJECT_NAME}_ENABLE_TESTS OFF)
ENDIF()
IF(NOT DEFINED TPL_ENABLE_Pthread)
SET(TPL_ENABLE_Pthread OFF)
ENDIF()
FUNCTION(ASSERT_DEFINED VARS)
FOREACH(VAR ${VARS})
IF(NOT DEFINED ${VAR})
@ -70,9 +88,11 @@ ENDMACRO()
MACRO(TRIBITS_ADD_TEST_DIRECTORIES)
FOREACH(TEST_DIR ${ARGN})
ADD_SUBDIRECTORY(${TEST_DIR})
ENDFOREACH()
IF(${${PROJECT_NAME}_ENABLE_TESTS})
FOREACH(TEST_DIR ${ARGN})
ADD_SUBDIRECTORY(${TEST_DIR})
ENDFOREACH()
ENDIF()
ENDMACRO()
MACRO(TRIBITS_ADD_EXAMPLE_DIRECTORIES)
@ -264,11 +284,11 @@ FUNCTION(TRIBITS_ADD_EXECUTABLE EXE_NAME)
SET(EXE_BINARY_NAME ${PACKAGE_NAME}_${EXE_BINARY_NAME})
ENDIF()
IF (PARSE_TESTONLY)
SET(EXCLUDE_FROM_ALL_KEYWORD "EXCLUDE_FROM_ALL")
ELSE()
SET(EXCLUDE_FROM_ALL_KEYWORD)
ENDIF()
# IF (PARSE_TESTONLY)
# SET(EXCLUDE_FROM_ALL_KEYWORD "EXCLUDE_FROM_ALL")
# ELSE()
# SET(EXCLUDE_FROM_ALL_KEYWORD)
# ENDIF()
ADD_EXECUTABLE(${EXE_BINARY_NAME} ${EXCLUDE_FROM_ALL_KEYWORD} ${EXE_SOURCES})
TARGET_LINK_AND_INCLUDE_LIBRARIES(${EXE_BINARY_NAME} ${LINK_LIBS})
@ -470,9 +490,8 @@ ENDMACRO(TRIBITS_SUBPACKAGE_POSTPROCESS)
MACRO(TRIBITS_PACKAGE_DECL NAME)
PROJECT(${NAME})
STRING(TOUPPER ${PROJECT_NAME} PROJECT_NAME_UC)
SET(PACKAGE_NAME ${PROJECT_NAME})
SET(PACKAGE_NAME ${NAME})
SET(${PACKAGE_NAME}_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
STRING(TOUPPER ${PACKAGE_NAME} PACKAGE_NAME_UC)
SET(TRIBITS_DEPS_DIR "${CMAKE_SOURCE_DIR}/cmake/deps")
@ -489,7 +508,7 @@ MACRO(TRIBITS_PROCESS_SUBPACKAGES)
FOREACH(SUBPACKAGE ${SUBPACKAGES})
GET_FILENAME_COMPONENT(SUBPACKAGE_CMAKE ${SUBPACKAGE} DIRECTORY)
GET_FILENAME_COMPONENT(SUBPACKAGE_DIR ${SUBPACKAGE_CMAKE} DIRECTORY)
ADD_SUBDIRECTORY(${SUBPACKAGE_DIR})
ADD_SUBDIRECTORY(${CMAKE_BINARY_DIR}/../${SUBPACKAGE_DIR})
ENDFOREACH()
ENDMACRO(TRIBITS_PROCESS_SUBPACKAGES)

View File

@ -4,3 +4,4 @@ tag: 2.01.10 date: 09:27:2016 master: e4119325 develop: e6cda11e
tag: 2.02.00 date: 10:30:2016 master: 6c90a581 develop: ca3dd56e
tag: 2.02.01 date: 11:01:2016 master: 9c698c86 develop: b0072304
tag: 2.02.07 date: 12:16:2016 master: 4b4cc4ba develop: 382c0966
tag: 2.02.15 date: 02:10:2017 master: 8c64cd93 develop: 28dea8b6

View File

@ -10,12 +10,18 @@ set -o pipefail
MACHINE=""
HOSTNAME=$(hostname)
PROCESSOR=`uname -p`
if [[ "$HOSTNAME" =~ (white|ride).* ]]; then
MACHINE=white
elif [[ "$HOSTNAME" =~ .*bowman.* ]]; then
MACHINE=bowman
elif [[ "$HOSTNAME" =~ node.* ]]; then # Warning: very generic name
MACHINE=shepard
if [[ "$PROCESSOR" = "aarch64" ]]; then
MACHINE=sullivan
else
MACHINE=shepard
fi
elif [[ "$HOSTNAME" =~ apollo ]]; then
MACHINE=apollo
elif [ ! -z "$SEMS_MODULEFILES_ROOT" ]; then
@ -27,6 +33,7 @@ fi
GCC_BUILD_LIST="OpenMP,Pthread,Serial,OpenMP_Serial,Pthread_Serial"
IBM_BUILD_LIST="OpenMP,Serial,OpenMP_Serial"
ARM_GCC_BUILD_LIST="OpenMP,Serial,OpenMP_Serial"
INTEL_BUILD_LIST="OpenMP,Pthread,Serial,OpenMP_Serial,Pthread_Serial"
CLANG_BUILD_LIST="Pthread,Serial,Pthread_Serial"
CUDA_BUILD_LIST="Cuda_OpenMP,Cuda_Pthread,Cuda_Serial"
@ -200,6 +207,23 @@ elif [ "$MACHINE" = "bowman" ]; then
if [ -z "$ARCH_FLAG" ]; then
ARCH_FLAG="--arch=KNL"
fi
NUM_JOBS_TO_RUN_IN_PARALLEL=2
elif [ "$MACHINE" = "sullivan" ]; then
source /etc/profile.d/modules.sh
SKIP_HWLOC=True
export SLURM_TASKS_PER_NODE=96
BASE_MODULE_LIST="<COMPILER_NAME>/<COMPILER_VERSION>"
# Format: (compiler module-list build-list exe-name warning-flag)
COMPILERS=("gcc/5.3.0 $BASE_MODULE_LIST $ARM_GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS")
if [ -z "$ARCH_FLAG" ]; then
ARCH_FLAG="--arch=ARMv8-ThunderX"
fi
NUM_JOBS_TO_RUN_IN_PARALLEL=2
elif [ "$MACHINE" = "shepard" ]; then
@ -297,7 +321,8 @@ echo " Defaults to root repo containing this script"
echo "--debug: Run tests in debug. Defaults to False"
echo "--test-script: Test this script, not Kokkos"
echo "--skip-hwloc: Do not do hwloc tests"
echo "--num=N: Number of jobs to run in parallel "
echo "--num=N: Number of jobs to run in parallel"
echo "--spot-check: Minimal test set to issue pull request"
echo "--dry-run: Just print what would be executed"
echo "--build-only: Just do builds, don't run anything"
echo "--opt-flag=FLAG: Optimization flag (default: -O3)"

View File

@ -0,0 +1,66 @@
#!/bin/bash
. /etc/profile.d/modules.sh
echo "build-dir $1"
echo "backend $2"
echo "module $3"
echo "compiler $4"
echo "cxxflags $5"
echo "architecrure $6"
echo "debug $7"
echo "kokkos-options $8"
echo "kokkos-cuda-options $9"
echo "hwloc $9"
NOW=`date "+%Y%m%d%H%M%S"`
BASEDIR="$1-$NOW"
mkdir $BASEDIR
cd $BASEDIR
module load $2
if [ $9 == "yes" ]; then
if [ $7 == "debug" ]; then
../generate_makefile.sh --with-devices=$2 \
--compiler=$4 \
--cxxflags=$5 \
--arch=$6 \
--debug \
--with-options=$8 \
--with-cuda-options=$9
--with-hwloc=${HWLOC_ROOT}
else
../generate_makefile.sh --with-devices=$2 \
--compiler=$4 \
--cxxflags=$5 \
--arch=$6 \
--debug \
--with-options=$8 \
--with-cuda-options=$9
--with-hwloc=${HWLOC_ROOT}
fi
else
if [ $7 == "debug" ]; then
../generate_makefile.sh --with-devices=$2 \
--compiler=$4 \
--cxxflags=$5 \
--arch=$6 \
--debug \
--with-options=$8 \
--with-cuda-options=$9
else
../generate_makefile.sh --with-devices=$2 \
--compiler=$4 \
--cxxflags=$5 \
--arch=$6 \
--debug \
--with-options=$8 \
--with-cuda-options=$9
fi
fi
make test
return $?

View File

@ -27,13 +27,13 @@ cd ${TRILINOS_UPDATED_PATH}
echo ""
echo ""
echo "Trilinos State:"
git log --pretty=oneline --since=2.days
SHA=`git log --pretty=oneline --since=2.days | head -n 2 | tail -n 1 | awk '{print $1}'`
git log --pretty=oneline --since=7.days
SHA=`git log --pretty=oneline --since=7.days | head -n 2 | tail -n 1 | awk '{print $1}'`
cd ..
cd ${TRILINOS_PRISTINE_PATH}
git status
git log --pretty=oneline --since=2.days
git log --pretty=oneline --since=7.days
echo "Checkout develop"
git checkout develop
echo "Pull"
@ -46,5 +46,5 @@ cd ${TRILINOS_PRISTINE_PATH}
echo ""
echo ""
echo "Trilinos Pristine State:"
git log --pretty=oneline --since=2.days
git log --pretty=oneline --since=7.days
cd ..

View File

@ -52,7 +52,7 @@
#include <Kokkos_Core.hpp>
#if defined( KOKKOS_HAVE_CUDA )
#if defined( KOKKOS_ENABLE_CUDA )
#include <TestDynRankView.hpp>
@ -106,4 +106,4 @@ TEST_F( cuda, unordered_map_performance_far)
}
#endif /* #if defined( KOKKOS_HAVE_CUDA ) */
#endif /* #if defined( KOKKOS_ENABLE_CUDA ) */

View File

@ -164,12 +164,10 @@ struct UnorderedMapTest
};
//#define KOKKOS_COLLECT_UNORDERED_MAP_METRICS
template <typename Device, bool Near>
void run_performance_tests(std::string const & base_file_name)
{
#if defined(KOKKOS_COLLECT_UNORDERED_MAP_METRICS)
#if 0
std::string metrics_file_name = base_file_name + std::string("-metrics.csv");
std::string length_file_name = base_file_name + std::string("-length.csv");
std::string distance_file_name = base_file_name + std::string("-distance.csv");

View File

@ -586,13 +586,13 @@ private:
#if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK )
// rank of the calling operator - included as first argument in ARG
#define KOKKOS_VIEW_OPERATOR_VERIFY( ARG ) \
#define KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( ARG ) \
DynRankView::template verify_space< Kokkos::Impl::ActiveExecutionMemorySpace >::check(); \
Kokkos::Experimental::Impl::dyn_rank_view_verify_operator_bounds ARG ;
#else
#define KOKKOS_VIEW_OPERATOR_VERIFY( ARG ) \
#define KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( ARG ) \
DynRankView::template verify_space< Kokkos::Impl::ActiveExecutionMemorySpace >::check();
#endif
@ -609,9 +609,9 @@ public:
reference_type operator()() const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (0 , this->rank() , NULL , m_map) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (0 , this->rank() , NULL , m_map) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (0 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (0 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map) )
#endif
return implementation_map().reference();
//return m_map.reference(0,0,0,0,0,0,0);
@ -650,9 +650,9 @@ public:
operator()(const iType & i0 ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (1 , this->rank() , NULL , m_map , i0) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (1 , this->rank() , NULL , m_map , i0) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (1 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (1 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0) )
#endif
return m_map.reference(i0);
}
@ -663,9 +663,9 @@ public:
operator()(const iType & i0 ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (1 , this->rank() , NULL , m_map , i0) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (1 , this->rank() , NULL , m_map , i0) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (1 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (1 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0) )
#endif
return m_map.reference(i0,0,0,0,0,0,0);
}
@ -677,9 +677,9 @@ public:
operator()(const iType0 & i0 , const iType1 & i1 ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (2 , this->rank() , NULL , m_map , i0 , i1) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (2 , this->rank() , NULL , m_map , i0 , i1) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (2 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (2 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1) )
#endif
return m_map.reference(i0,i1);
}
@ -690,9 +690,9 @@ public:
operator()(const iType0 & i0 , const iType1 & i1 ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (2 , this->rank() , NULL , m_map , i0 , i1) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (2 , this->rank() , NULL , m_map , i0 , i1) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (2 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (2 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1) )
#endif
return m_map.reference(i0,i1,0,0,0,0,0);
}
@ -704,9 +704,9 @@ public:
operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (3 , this->rank() , NULL , m_map , i0 , i1 , i2) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (3 , this->rank() , NULL , m_map , i0 , i1 , i2) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (3 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (3 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2) )
#endif
return m_map.reference(i0,i1,i2);
}
@ -717,9 +717,9 @@ public:
operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (3 , this->rank() , NULL , m_map , i0 , i1 , i2) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (3 , this->rank() , NULL , m_map , i0 , i1 , i2) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (3 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (3 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2) )
#endif
return m_map.reference(i0,i1,i2,0,0,0,0);
}
@ -731,9 +731,9 @@ public:
operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (4 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (4 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (4 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (4 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3) )
#endif
return m_map.reference(i0,i1,i2,i3);
}
@ -744,9 +744,9 @@ public:
operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (4 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (4 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (4 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (4 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3) )
#endif
return m_map.reference(i0,i1,i2,i3,0,0,0);
}
@ -758,9 +758,9 @@ public:
operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (5 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3, i4) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (5 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3, i4) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (5 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (5 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4) )
#endif
return m_map.reference(i0,i1,i2,i3,i4);
}
@ -771,9 +771,9 @@ public:
operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (5 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3, i4) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (5 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3, i4) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (5 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (5 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4) )
#endif
return m_map.reference(i0,i1,i2,i3,i4,0,0);
}
@ -785,9 +785,9 @@ public:
operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 , const iType5 & i5 ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (6 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3, i4 , i5) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (6 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3, i4 , i5) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (6 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (6 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5) )
#endif
return m_map.reference(i0,i1,i2,i3,i4,i5);
}
@ -798,9 +798,9 @@ public:
operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 , const iType5 & i5 ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (6 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3, i4 , i5) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (6 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3, i4 , i5) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (6 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (6 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5) )
#endif
return m_map.reference(i0,i1,i2,i3,i4,i5,0);
}
@ -812,14 +812,14 @@ public:
operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 , const iType5 & i5 , const iType6 & i6 ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (7 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3, i4 , i5 , i6) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (7 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3, i4 , i5 , i6) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (7 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,i6) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (7 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,i6) )
#endif
return m_map.reference(i0,i1,i2,i3,i4,i5,i6);
}
#undef KOKKOS_VIEW_OPERATOR_VERIFY
#undef KOKKOS_IMPL_VIEW_OPERATOR_VERIFY
//----------------------------------------
// Standard constructor, destructor, and assignment operators...
@ -960,7 +960,7 @@ public:
alloc_prop prop( arg_prop );
//------------------------------------------------------------
#if defined( KOKKOS_HAVE_CUDA )
#if defined( KOKKOS_ENABLE_CUDA )
// If allocating in CudaUVMSpace must fence before and after
// the allocation to protect against possible concurrent access
// on the CPU and the GPU.
@ -976,7 +976,7 @@ public:
record = m_map.allocate_shared( prop , Impl::DynRankDimTraits<typename traits::specialize>::createLayout(arg_layout) );
//------------------------------------------------------------
#if defined( KOKKOS_HAVE_CUDA )
#if defined( KOKKOS_ENABLE_CUDA )
if ( std::is_same< Kokkos::CudaUVMSpace , typename traits::device_type::memory_space >::value ) {
traits::device_type::memory_space::execution_space::fence();
}

View File

@ -51,6 +51,80 @@
namespace Kokkos {
namespace Impl {
template<class RowOffsetsType, class RowBlockOffsetsType>
struct StaticCrsGraphBalancerFunctor {
typedef typename RowOffsetsType::non_const_value_type int_type;
RowOffsetsType row_offsets;
RowBlockOffsetsType row_block_offsets;
int_type cost_per_row, num_blocks;
StaticCrsGraphBalancerFunctor(RowOffsetsType row_offsets_,
RowBlockOffsetsType row_block_offsets_,
int_type cost_per_row_, int_type num_blocks_):
row_offsets(row_offsets_),
row_block_offsets(row_block_offsets_),
cost_per_row(cost_per_row_),
num_blocks(num_blocks_){}
KOKKOS_INLINE_FUNCTION
void operator() (const int_type& iRow) const {
const int_type num_rows = row_offsets.dimension_0()-1;
const int_type num_entries = row_offsets(num_rows);
const int_type total_cost = num_entries + num_rows*cost_per_row;
const double cost_per_workset = 1.0*total_cost/num_blocks;
const int_type row_cost = row_offsets(iRow+1)-row_offsets(iRow) + cost_per_row;
int_type count = row_offsets(iRow+1) + cost_per_row*iRow;
if(iRow == num_rows-1) row_block_offsets(num_blocks) = num_rows;
if(true) {
int_type current_block = (count-row_cost-cost_per_row)/cost_per_workset;
int_type end_block = count/cost_per_workset;
// Handle some corner cases for the last two blocks.
if(current_block >= num_blocks-2) {
if((current_block == num_blocks-2) && (count >= (current_block + 1) * cost_per_workset)) {
int_type row = iRow;
int_type cc = count-row_cost-cost_per_row;
int_type block = cc/cost_per_workset;
while((block>0) && (block==current_block)) {
cc = row_offsets(row)+row*cost_per_row;
block = cc/cost_per_workset;
row--;
}
if((count-cc-row_cost-cost_per_row) < num_entries-row_offsets(iRow+1)) {
row_block_offsets(current_block+1) = iRow+1;
} else {
row_block_offsets(current_block+1) = iRow;
}
}
} else {
if((count >= (current_block + 1) * cost_per_workset) ||
(iRow+2 == row_offsets.dimension_0())) {
if(end_block>current_block+1) {
int_type num_block = end_block-current_block;
row_block_offsets(current_block+1) = iRow;
for(int_type block = current_block+2; block <= end_block; block++)
if((block<current_block+2+(num_block-1)/2))
row_block_offsets(block) = iRow;
else
row_block_offsets(block) = iRow+1;
} else {
row_block_offsets(current_block+1) = iRow+1;
}
}
}
}
}
};
}
/// \class StaticCrsGraph
/// \brief Compressed row storage array.
///
@ -100,19 +174,23 @@ public:
typedef StaticCrsGraph< DataType , array_layout , typename traits::host_mirror_space , SizeType > HostMirror;
typedef View< const size_type* , array_layout, device_type > row_map_type;
typedef View< DataType* , array_layout, device_type > entries_type;
typedef View< const size_type* , array_layout, device_type > row_block_type;
entries_type entries;
row_map_type row_map;
row_block_type row_block_offsets;
//! Construct an empty view.
StaticCrsGraph () : entries(), row_map() {}
StaticCrsGraph () : entries(), row_map(), row_block_offsets() {}
//! Copy constructor (shallow copy).
StaticCrsGraph (const StaticCrsGraph& rhs) : entries (rhs.entries), row_map (rhs.row_map)
StaticCrsGraph (const StaticCrsGraph& rhs) : entries (rhs.entries), row_map (rhs.row_map),
row_block_offsets(rhs.row_block_offsets)
{}
template<class EntriesType, class RowMapType>
StaticCrsGraph (const EntriesType& entries_,const RowMapType& row_map_) : entries (entries_), row_map (row_map_)
StaticCrsGraph (const EntriesType& entries_,const RowMapType& row_map_) : entries (entries_), row_map (row_map_),
row_block_offsets()
{}
/** \brief Assign to a view of the rhs array.
@ -122,6 +200,7 @@ public:
StaticCrsGraph& operator= (const StaticCrsGraph& rhs) {
entries = rhs.entries;
row_map = rhs.row_map;
row_block_offsets = rhs.row_block_offsets;
return *this;
}
@ -130,12 +209,30 @@ public:
*/
~StaticCrsGraph() {}
/** \brief Return number of rows in the graph
*/
KOKKOS_INLINE_FUNCTION
size_type numRows() const {
return (row_map.dimension_0 () != 0) ?
row_map.dimension_0 () - static_cast<size_type> (1) :
static_cast<size_type> (0);
}
/** \brief Create a row partitioning into a given number of blocks
* balancing non-zeros + a fixed cost per row.
*/
void create_block_partitioning(size_type num_blocks, size_type fix_cost_per_row = 4) {
View< size_type* , array_layout, device_type >
block_offsets("StatisCrsGraph::load_balance_offsets",num_blocks+1);
Impl::StaticCrsGraphBalancerFunctor<row_map_type,View< size_type* , array_layout, device_type > >
partitioner(row_map,block_offsets,fix_cost_per_row,num_blocks);
Kokkos::parallel_for(Kokkos::RangePolicy<execution_space>(0,numRows()),partitioner);
Kokkos::fence();
row_block_offsets = block_offsets;
}
};
//----------------------------------------------------------------------------

View File

@ -72,7 +72,7 @@ private:
public:
#ifdef KOKKOS_CUDA_USE_UVM
#ifdef KOKKOS_ENABLE_CUDA_UVM
KOKKOS_INLINE_FUNCTION Scalar& operator() (int i) const {return DV::h_view(i);};
KOKKOS_INLINE_FUNCTION Scalar& operator[] (int i) const {return DV::h_view(i);};
#else

View File

@ -133,11 +133,11 @@ uint32_t MurmurHash3_x86_32 ( const void * key, int len, uint32_t seed )
defined( __GNUG__ ) /* GNU C++ */ || \
defined( __clang__ )
#define KOKKOS_MAY_ALIAS __attribute__((__may_alias__))
#define KOKKOS_IMPL_MAY_ALIAS __attribute__((__may_alias__))
#else
#define KOKKOS_MAY_ALIAS
#define KOKKOS_IMPL_MAY_ALIAS
#endif
@ -145,10 +145,10 @@ template <typename T>
KOKKOS_FORCEINLINE_FUNCTION
bool bitwise_equal(T const * const a_ptr, T const * const b_ptr)
{
typedef uint64_t KOKKOS_MAY_ALIAS T64;
typedef uint32_t KOKKOS_MAY_ALIAS T32;
typedef uint16_t KOKKOS_MAY_ALIAS T16;
typedef uint8_t KOKKOS_MAY_ALIAS T8;
typedef uint64_t KOKKOS_IMPL_MAY_ALIAS T64;
typedef uint32_t KOKKOS_IMPL_MAY_ALIAS T32;
typedef uint16_t KOKKOS_IMPL_MAY_ALIAS T16;
typedef uint8_t KOKKOS_IMPL_MAY_ALIAS T8;
enum {
NUM_8 = sizeof(T),
@ -188,7 +188,7 @@ bool bitwise_equal(T const * const a_ptr, T const * const b_ptr)
#undef KOKKOS_MAY_ALIAS
#undef KOKKOS_IMPL_MAY_ALIAS
}} // namespace Kokkos::Impl

View File

@ -69,15 +69,17 @@ create_mirror( const StaticCrsGraph<DataType,Arg1Type,Arg2Type,SizeType > & view
typename staticcrsgraph_type::HostMirror tmp ;
typename staticcrsgraph_type::row_map_type::HostMirror tmp_row_map = create_mirror( view.row_map);
typename staticcrsgraph_type::row_block_type::HostMirror tmp_row_block_offsets = create_mirror( view.row_block_offsets);
// Allocation to match:
tmp.row_map = tmp_row_map ; // Assignment of 'const' from 'non-const'
tmp.entries = create_mirror( view.entries );
tmp.row_block_offsets = tmp_row_block_offsets ; // Assignment of 'const' from 'non-const'
// Deep copy:
deep_copy( tmp_row_map , view.row_map );
deep_copy( tmp.entries , view.entries );
deep_copy( tmp_row_block_offsets , view.row_block_offsets );
return tmp ;
}

View File

@ -69,7 +69,7 @@
//----------------------------------------------------------------------------
#ifdef KOKKOS_HAVE_CUDA
#ifdef KOKKOS_ENABLE_CUDA
namespace Test {
@ -96,6 +96,18 @@ TEST_F( cuda , staticcrsgraph )
{
TestStaticCrsGraph::run_test_graph< Kokkos::Cuda >();
TestStaticCrsGraph::run_test_graph2< Kokkos::Cuda >();
TestStaticCrsGraph::run_test_graph3< Kokkos::Cuda >(1, 0);
TestStaticCrsGraph::run_test_graph3< Kokkos::Cuda >(1, 1000);
TestStaticCrsGraph::run_test_graph3< Kokkos::Cuda >(1, 10000);
TestStaticCrsGraph::run_test_graph3< Kokkos::Cuda >(1, 100000);
TestStaticCrsGraph::run_test_graph3< Kokkos::Cuda >(3, 0);
TestStaticCrsGraph::run_test_graph3< Kokkos::Cuda >(3, 1000);
TestStaticCrsGraph::run_test_graph3< Kokkos::Cuda >(3, 10000);
TestStaticCrsGraph::run_test_graph3< Kokkos::Cuda >(3, 100000);
TestStaticCrsGraph::run_test_graph3< Kokkos::Cuda >(75, 0);
TestStaticCrsGraph::run_test_graph3< Kokkos::Cuda >(75, 1000);
TestStaticCrsGraph::run_test_graph3< Kokkos::Cuda >(75, 10000);
TestStaticCrsGraph::run_test_graph3< Kokkos::Cuda >(75, 100000);
}
@ -225,5 +237,5 @@ TEST_F(cuda, ErrorReporter)
}
#endif /* #ifdef KOKKOS_HAVE_CUDA */
#endif /* #ifdef KOKKOS_ENABLE_CUDA */

View File

@ -1298,7 +1298,7 @@ public:
// For CUDA the constant random access View does not return
// an lvalue reference due to retrieving through texture cache
// therefore not allowed to query the underlying pointer.
#if defined(KOKKOS_HAVE_CUDA)
#if defined(KOKKOS_ENABLE_CUDA)
if ( ! std::is_same< typename device::execution_space , Kokkos::Cuda >::value )
#endif
{
@ -1408,7 +1408,7 @@ public:
ASSERT_EQ( ds5.dimension_4() , ds5plus.dimension_4() );
ASSERT_EQ( ds5.dimension_5() , ds5plus.dimension_5() );
#if ! defined( KOKKOS_HAVE_CUDA ) || defined ( KOKKOS_USE_CUDA_UVM )
#if ! defined( KOKKOS_ENABLE_CUDA ) || defined ( KOKKOS_ENABLE_CUDA_UVM )
ASSERT_EQ( & ds5(1,1,1,1,0) - & ds5plus(1,1,1,1,0) , 0 );
ASSERT_EQ( & ds5(1,1,1,1,0,0) - & ds5plus(1,1,1,1,0,0) , 0 ); // passing argument to rank beyond the view's rank is allowed iff it is a 0.
#endif

View File

@ -200,7 +200,7 @@ struct ErrorReporterDriverUseLambda : public ErrorReporterDriverBase<DeviceType>
#endif
#ifdef KOKKOS_HAVE_OPENMP
#ifdef KOKKOS_ENABLE_OPENMP
struct ErrorReporterDriverNativeOpenMP : public ErrorReporterDriverBase<Kokkos::OpenMP>
{
typedef ErrorReporterDriverBase<Kokkos::OpenMP> driver_base;

View File

@ -68,7 +68,7 @@
namespace Test {
#ifdef KOKKOS_HAVE_OPENMP
#ifdef KOKKOS_ENABLE_OPENMP
class openmp : public ::testing::Test {
protected:
static void SetUpTestCase()
@ -109,6 +109,18 @@ TEST_F( openmp , staticcrsgraph )
{
TestStaticCrsGraph::run_test_graph< Kokkos::OpenMP >();
TestStaticCrsGraph::run_test_graph2< Kokkos::OpenMP >();
TestStaticCrsGraph::run_test_graph3< Kokkos::OpenMP >(1, 0);
TestStaticCrsGraph::run_test_graph3< Kokkos::OpenMP >(1, 1000);
TestStaticCrsGraph::run_test_graph3< Kokkos::OpenMP >(1, 10000);
TestStaticCrsGraph::run_test_graph3< Kokkos::OpenMP >(1, 100000);
TestStaticCrsGraph::run_test_graph3< Kokkos::OpenMP >(3, 0);
TestStaticCrsGraph::run_test_graph3< Kokkos::OpenMP >(3, 1000);
TestStaticCrsGraph::run_test_graph3< Kokkos::OpenMP >(3, 10000);
TestStaticCrsGraph::run_test_graph3< Kokkos::OpenMP >(3, 100000);
TestStaticCrsGraph::run_test_graph3< Kokkos::OpenMP >(75, 0);
TestStaticCrsGraph::run_test_graph3< Kokkos::OpenMP >(75, 1000);
TestStaticCrsGraph::run_test_graph3< Kokkos::OpenMP >(75, 10000);
TestStaticCrsGraph::run_test_graph3< Kokkos::OpenMP >(75, 100000);
}
#define OPENMP_INSERT_TEST( name, num_nodes, num_inserts, num_duplicates, repeat, near ) \

View File

@ -45,7 +45,7 @@
#include <Kokkos_Core.hpp>
#if ! defined(KOKKOS_HAVE_SERIAL)
#if ! defined(KOKKOS_ENABLE_SERIAL)
# error "It doesn't make sense to build this file unless the Kokkos::Serial device is enabled. If you see this message, it probably means that there is an error in Kokkos' CMake build infrastructure."
#else
@ -91,6 +91,18 @@ TEST_F( serial , staticcrsgraph )
{
TestStaticCrsGraph::run_test_graph< Kokkos::Serial >();
TestStaticCrsGraph::run_test_graph2< Kokkos::Serial >();
TestStaticCrsGraph::run_test_graph3< Kokkos::Serial >(1, 0);
TestStaticCrsGraph::run_test_graph3< Kokkos::Serial >(1, 1000);
TestStaticCrsGraph::run_test_graph3< Kokkos::Serial >(1, 10000);
TestStaticCrsGraph::run_test_graph3< Kokkos::Serial >(1, 100000);
TestStaticCrsGraph::run_test_graph3< Kokkos::Serial >(3, 0);
TestStaticCrsGraph::run_test_graph3< Kokkos::Serial >(3, 1000);
TestStaticCrsGraph::run_test_graph3< Kokkos::Serial >(3, 10000);
TestStaticCrsGraph::run_test_graph3< Kokkos::Serial >(3, 100000);
TestStaticCrsGraph::run_test_graph3< Kokkos::Serial >(75, 0);
TestStaticCrsGraph::run_test_graph3< Kokkos::Serial >(75, 1000);
TestStaticCrsGraph::run_test_graph3< Kokkos::Serial >(75, 10000);
TestStaticCrsGraph::run_test_graph3< Kokkos::Serial >(75, 100000);
}
TEST_F( serial, complex )
@ -178,6 +190,6 @@ TEST_F(serial, ErrorReporter)
} // namespace Test
#endif // KOKKOS_HAVE_SERIAL
#endif // KOKKOS_ENABLE_SERIAL

View File

@ -144,6 +144,44 @@ void run_test_graph2()
}
}
template< class Space >
void run_test_graph3(size_t B, size_t N)
{
srand(10310);
typedef Kokkos::StaticCrsGraph< int , Space > dView ;
typedef typename dView::HostMirror hView ;
const unsigned LENGTH = 2000 ;
std::vector< size_t > sizes( LENGTH );
size_t total_length = 0 ;
for ( size_t i = 0 ; i < LENGTH ; ++i ) {
sizes[i] = rand()%1000;
}
sizes[1] = N;
sizes[1998] = N;
for ( size_t i = 0 ; i < LENGTH ; ++i ) {
total_length += sizes[i];
}
int C = 0;
dView dx = Kokkos::create_staticcrsgraph<dView>( "test" , sizes );
dx.create_block_partitioning(B,C);
hView hx = Kokkos::create_mirror( dx );
for( size_t i = 0; i<B; i++) {
size_t ne = 0;
for(size_t j = hx.row_block_offsets(i); j<hx.row_block_offsets(i+1); j++)
ne += hx.row_map(j+1)-hx.row_map(j)+C;
ASSERT_FALSE((ne>2*((hx.row_map(hx.numRows())+C*hx.numRows())/B))&&(hx.row_block_offsets(i+1)>hx.row_block_offsets(i)+1));
}
}
} /* namespace TestStaticCrsGraph */

View File

@ -45,7 +45,7 @@
#include <Kokkos_Core.hpp>
#if defined( KOKKOS_HAVE_PTHREAD )
#if defined( KOKKOS_ENABLE_PTHREAD )
#include <Kokkos_Bitset.hpp>
#include <Kokkos_UnorderedMap.hpp>
@ -106,6 +106,18 @@ TEST_F( threads , staticcrsgraph )
{
TestStaticCrsGraph::run_test_graph< Kokkos::Threads >();
TestStaticCrsGraph::run_test_graph2< Kokkos::Threads >();
TestStaticCrsGraph::run_test_graph3< Kokkos::Threads >(1, 0);
TestStaticCrsGraph::run_test_graph3< Kokkos::Threads >(1, 1000);
TestStaticCrsGraph::run_test_graph3< Kokkos::Threads >(1, 10000);
TestStaticCrsGraph::run_test_graph3< Kokkos::Threads >(1, 100000);
TestStaticCrsGraph::run_test_graph3< Kokkos::Threads >(3, 0);
TestStaticCrsGraph::run_test_graph3< Kokkos::Threads >(3, 1000);
TestStaticCrsGraph::run_test_graph3< Kokkos::Threads >(3, 10000);
TestStaticCrsGraph::run_test_graph3< Kokkos::Threads >(3, 100000);
TestStaticCrsGraph::run_test_graph3< Kokkos::Threads >(75, 0);
TestStaticCrsGraph::run_test_graph3< Kokkos::Threads >(75, 1000);
TestStaticCrsGraph::run_test_graph3< Kokkos::Threads >(75, 10000);
TestStaticCrsGraph::run_test_graph3< Kokkos::Threads >(75, 100000);
}
/*TEST_F( threads, bitset )
@ -190,5 +202,5 @@ TEST_F(threads, ErrorReporter)
} // namespace Test
#endif /* #if defined( KOKKOS_HAVE_PTHREAD ) */
#endif /* #if defined( KOKKOS_ENABLE_PTHREAD ) */

View File

@ -48,7 +48,7 @@
#include <Kokkos_Core.hpp>
#if defined( KOKKOS_HAVE_CUDA )
#if defined( KOKKOS_ENABLE_CUDA )
#include <impl/Kokkos_Timer.hpp>
@ -185,5 +185,5 @@ TEST_F( cuda, texture_double )
} // namespace Test
#endif /* #if defined( KOKKOS_HAVE_CUDA ) */
#endif /* #if defined( KOKKOS_ENABLE_CUDA ) */

View File

@ -51,8 +51,8 @@
// macro, so I'm commenting out the macro to avoid compiler complaints
// about an unused macro.
// #define KOKKOS_MACRO_IMPL_TO_STRING( X ) #X
// #define KOKKOS_MACRO_TO_STRING( X ) KOKKOS_MACRO_IMPL_TO_STRING( X )
// #define KOKKOS_IMPL_MACRO_TO_STRING( X ) #X
// #define KOKKOS_MACRO_TO_STRING( X ) KOKKOS_IMPL_MACRO_TO_STRING( X )
//------------------------------------------------------------------------

View File

@ -45,17 +45,17 @@
#include <Kokkos_Core.hpp>
#if defined( KOKKOS_HAVE_OPENMP )
#if defined( KOKKOS_ENABLE_OPENMP )
typedef Kokkos::OpenMP TestHostDevice ;
const char TestHostDeviceName[] = "Kokkos::OpenMP" ;
#elif defined( KOKKOS_HAVE_PTHREAD )
#elif defined( KOKKOS_ENABLE_PTHREAD )
typedef Kokkos::Threads TestHostDevice ;
const char TestHostDeviceName[] = "Kokkos::Threads" ;
#elif defined( KOKKOS_HAVE_SERIAL )
#elif defined( KOKKOS_ENABLE_SERIAL )
typedef Kokkos::Serial TestHostDevice ;
const char TestHostDeviceName[] = "Kokkos::Serial" ;

View File

@ -47,7 +47,7 @@
#include <Kokkos_Macros.hpp>
/* only compile this file if CUDA is enabled for Kokkos */
#ifdef KOKKOS_HAVE_CUDA
#ifdef KOKKOS_ENABLE_CUDA
#include <string>
#include <Kokkos_Parallel.hpp>
@ -112,7 +112,7 @@ CudaSpace::size_type * cuda_internal_scratch_unified( const CudaSpace::size_type
#if defined( __CUDACC__ )
/** \brief Access to constant memory on the device */
#ifdef KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE
#ifdef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE
__device__ __constant__
extern unsigned long kokkos_impl_cuda_constant_memory_buffer[] ;
@ -135,7 +135,7 @@ namespace Impl {
}
}
__device__ __constant__
#ifdef KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE
#ifdef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE
extern
#endif
Kokkos::Impl::CudaLockArraysStruct kokkos_impl_cuda_lock_arrays ;
@ -245,7 +245,7 @@ struct CudaParallelLaunch< DriverType , true > {
// Copy functor to constant memory on the device
cudaMemcpyToSymbol( kokkos_impl_cuda_constant_memory_buffer , & driver , sizeof(DriverType) );
#ifndef KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE
#ifndef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE
Kokkos::Impl::CudaLockArraysStruct locks;
locks.atomic = atomic_lock_array_cuda_space_ptr(false);
locks.scratch = scratch_lock_array_cuda_space_ptr(false);
@ -287,7 +287,7 @@ struct CudaParallelLaunch< DriverType , false > {
}
#endif
#ifndef KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE
#ifndef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE
Kokkos::Impl::CudaLockArraysStruct locks;
locks.atomic = atomic_lock_array_cuda_space_ptr(false);
locks.scratch = scratch_lock_array_cuda_space_ptr(false);
@ -314,5 +314,5 @@ struct CudaParallelLaunch< DriverType , false > {
//----------------------------------------------------------------------------
#endif /* defined( __CUDACC__ ) */
#endif /* defined( KOKKOS_HAVE_CUDA ) */
#endif /* defined( KOKKOS_ENABLE_CUDA ) */
#endif /* #ifndef KOKKOS_CUDAEXEC_HPP */

View File

@ -50,7 +50,7 @@
#include <Kokkos_Macros.hpp>
/* only compile this file if CUDA is enabled for Kokkos */
#ifdef KOKKOS_HAVE_CUDA
#ifdef KOKKOS_ENABLE_CUDA
#include <Kokkos_Core.hpp>
#include <Kokkos_Cuda.hpp>
@ -910,5 +910,5 @@ void* cuda_resize_scratch_space(size_t bytes, bool force_shrink) {
}
}
#endif // KOKKOS_HAVE_CUDA
#endif // KOKKOS_ENABLE_CUDA

View File

@ -47,7 +47,7 @@
#include <Kokkos_Macros.hpp>
/* only compile this file if CUDA is enabled for Kokkos */
#ifdef KOKKOS_HAVE_CUDA
#ifdef KOKKOS_ENABLE_CUDA
#include <impl/Kokkos_Traits.hpp>
@ -176,7 +176,7 @@ public:
}} // namespace Kokkos::Impl
#endif //KOKKOS_HAVE_CUDA
#endif //KOKKOS_ENABLE_CUDA
#endif // #ifndef KOKKOS_CUDA_ALLOCATION_TRACKING_HPP

View File

@ -47,7 +47,7 @@
#include <Kokkos_Macros.hpp>
/* only compile this file if CUDA is enabled for Kokkos */
#ifdef KOKKOS_HAVE_CUDA
#ifdef KOKKOS_ENABLE_CUDA
namespace Kokkos { namespace Impl {
@ -65,5 +65,5 @@ inline void cuda_internal_safe_call( cudaError e , const char * name, const char
}} // namespace Kokkos::Impl
#endif //KOKKOS_HAVE_CUDA
#endif //KOKKOS_ENABLE_CUDA
#endif //KOKKOS_CUDA_ERROR_HPP

View File

@ -47,7 +47,7 @@
#include <Kokkos_Core.hpp>
/* only compile this file if CUDA is enabled for Kokkos */
#ifdef KOKKOS_HAVE_CUDA
#ifdef KOKKOS_ENABLE_CUDA
#include <Cuda/Kokkos_Cuda_Error.hpp>
#include <Cuda/Kokkos_Cuda_Internal.hpp>
@ -64,7 +64,7 @@
#include <sstream>
#include <string>
#ifdef KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE
#ifdef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE
__device__ __constant__
unsigned long kokkos_impl_cuda_constant_memory_buffer[ Kokkos::Impl::CudaTraits::ConstantMemoryUsage / sizeof(unsigned long) ] ;
@ -299,8 +299,8 @@ void CudaInternal::print_configuration( std::ostream & s ) const
{
const CudaInternalDevices & dev_info = CudaInternalDevices::singleton();
#if defined( KOKKOS_HAVE_CUDA )
s << "macro KOKKOS_HAVE_CUDA : defined" << std::endl ;
#if defined( KOKKOS_ENABLE_CUDA )
s << "macro KOKKOS_ENABLE_CUDA : defined" << std::endl ;
#endif
#if defined( CUDA_VERSION )
s << "macro CUDA_VERSION = " << CUDA_VERSION
@ -500,7 +500,7 @@ void CudaInternal::initialize( int cuda_device_id , int stream_count )
Kokkos::Impl::throw_runtime_exception( msg.str() );
}
#ifdef KOKKOS_CUDA_USE_UVM
#ifdef KOKKOS_ENABLE_CUDA_UVM
if(!cuda_launch_blocking()) {
std::cout << "Kokkos::Cuda::initialize WARNING: Cuda is allocating into UVMSpace by default" << std::endl;
std::cout << " without setting CUDA_LAUNCH_BLOCKING=1." << std::endl;
@ -531,7 +531,7 @@ void CudaInternal::initialize( int cuda_device_id , int stream_count )
// Init the array for used for arbitrarily sized atomics
Impl::init_lock_arrays_cuda_space();
#ifdef KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE
#ifdef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE
Kokkos::Impl::CudaLockArraysStruct locks;
locks.atomic = atomic_lock_array_cuda_space_ptr(false);
locks.scratch = scratch_lock_array_cuda_space_ptr(false);
@ -773,6 +773,6 @@ void Cuda::fence()
} // namespace Kokkos
#endif // KOKKOS_HAVE_CUDA
#endif // KOKKOS_ENABLE_CUDA
//----------------------------------------------------------------------------

View File

@ -47,7 +47,7 @@
#include <Kokkos_Macros.hpp>
/* only compile this file if CUDA is enabled for Kokkos */
#ifdef KOKKOS_HAVE_CUDA
#ifdef KOKKOS_ENABLE_CUDA
#include <Cuda/Kokkos_Cuda_Error.hpp>
@ -197,6 +197,6 @@ struct CudaGetOptBlockSize<DriverType,false> {
}} // namespace Kokkos::Impl
#endif // KOKKOS_HAVE_CUDA
#endif // KOKKOS_ENABLE_CUDA
#endif /* #ifndef KOKKOS_CUDA_INTERNAL_HPP */

View File

@ -51,7 +51,7 @@
#include <Kokkos_Macros.hpp>
/* only compile this file if CUDA is enabled for Kokkos */
#if defined( __CUDACC__ ) && defined( KOKKOS_HAVE_CUDA )
#if defined( __CUDACC__ ) && defined( KOKKOS_ENABLE_CUDA )
#include <utility>
#include <Kokkos_Parallel.hpp>

View File

@ -47,7 +47,7 @@
#include <Kokkos_Macros.hpp>
/* only compile this file if CUDA is enabled for Kokkos */
#if defined( __CUDACC__ ) && defined( KOKKOS_HAVE_CUDA )
#if defined( __CUDACC__ ) && defined( KOKKOS_ENABLE_CUDA )
#include <utility>
@ -312,7 +312,7 @@ void cuda_intra_block_reduce_scan( const FunctorType & functor ,
( rtid_intra & 16 ) ? 16 : 0 ))));
if ( ! ( rtid_intra + n < blockDim.y ) ) n = 0 ;
#ifdef KOKKOS_CUDA_CLANG_WORKAROUND
#ifdef KOKKOS_IMPL_CUDA_CLANG_WORKAROUND
BLOCK_SCAN_STEP(tdata_intra,n,4) __syncthreads();//__threadfence_block();
BLOCK_SCAN_STEP(tdata_intra,n,3) __syncthreads();//__threadfence_block();
BLOCK_SCAN_STEP(tdata_intra,n,2) __syncthreads();//__threadfence_block();

View File

@ -43,7 +43,7 @@
#include <Kokkos_Core.hpp>
#if defined( KOKKOS_HAVE_CUDA ) && defined( KOKKOS_ENABLE_TASKDAG )
#if defined( KOKKOS_ENABLE_CUDA ) && defined( KOKKOS_ENABLE_TASKDAG )
#include <impl/Kokkos_TaskQueue_impl.hpp>
@ -174,6 +174,6 @@ printf("cuda_task_queue_execute after\n");
//----------------------------------------------------------------------------
#endif /* #if defined( KOKKOS_HAVE_CUDA ) && defined( KOKKOS_ENABLE_TASKDAG ) */
#endif /* #if defined( KOKKOS_ENABLE_CUDA ) && defined( KOKKOS_ENABLE_TASKDAG ) */

View File

@ -46,7 +46,7 @@
#include <Kokkos_Macros.hpp>
/* only compile this file if CUDA is enabled for Kokkos */
#ifdef KOKKOS_HAVE_CUDA
#ifdef KOKKOS_ENABLE_CUDA
#include <Kokkos_Cuda.hpp>
@ -294,5 +294,5 @@ namespace Impl {
}
#endif // KOKKOS_HAVE_CUDA
#endif // KOKKOS_ENABLE_CUDA
#endif

View File

@ -45,7 +45,7 @@
#define KOKKOS_EXPERIMENTAL_CUDA_VIEW_HPP
/* only compile this file if CUDA is enabled for Kokkos */
#if defined( KOKKOS_HAVE_CUDA )
#if defined( KOKKOS_ENABLE_CUDA )
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
@ -144,7 +144,7 @@ struct CudaTextureFetch {
{}
};
#if defined( KOKKOS_CUDA_USE_LDG_INTRINSIC )
#if defined( KOKKOS_ENABLE_CUDA_LDG_INTRINSIC )
template< typename ValueType , typename AliasType >
struct CudaLDGFetch {
@ -261,7 +261,7 @@ public:
>::type
>::type ;
#if defined( KOKKOS_CUDA_USE_LDG_INTRINSIC )
#if defined( KOKKOS_ENABLE_CUDA_LDG_INTRINSIC )
using handle_type = Kokkos::Experimental::Impl::CudaLDGFetch< value_type , alias_type > ;
#else
using handle_type = Kokkos::Experimental::Impl::CudaTextureFetch< value_type , alias_type > ;
@ -301,6 +301,6 @@ public:
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif /* #if defined( KOKKOS_HAVE_CUDA ) */
#endif /* #if defined( KOKKOS_ENABLE_CUDA ) */
#endif /* #ifndef KOKKOS_CUDA_VIEW_HPP */

View File

@ -47,7 +47,7 @@
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#include "Kokkos_Macros.hpp"
#if defined( __CUDACC__ ) && defined( KOKKOS_HAVE_CUDA )
#if defined( __CUDACC__ ) && defined( KOKKOS_ENABLE_CUDA )
#include <cuda.h>
@ -82,6 +82,6 @@ void cuda_abort( const char * const message )
} // namespace Impl
} // namespace Kokkos
#endif /* #if defined(__CUDACC__) && defined( KOKKOS_HAVE_CUDA ) */
#endif /* #if defined(__CUDACC__) && defined( KOKKOS_ENABLE_CUDA ) */
#endif /* #ifndef KOKKOS_CUDA_ABORT_HPP */

View File

@ -48,8 +48,8 @@
#include <Kokkos_Parallel.hpp>
#include <initializer_list>
#if defined(KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION) && defined(KOKKOS_HAVE_PRAGMA_IVDEP) && !defined(__CUDA_ARCH__)
#define KOKKOS_MDRANGE_IVDEP
#if defined(KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION) && defined(KOKKOS_ENABLE_PRAGMA_IVDEP) && !defined(__CUDA_ARCH__)
#define KOKKOS_IMPL_MDRANGE_IVDEP
#endif
namespace Kokkos { namespace Experimental {
@ -350,7 +350,7 @@ struct MDForFunctor
if ( MDRange::inner_direction == MDRange::Right ) {
for (int i0=b0; i0<e0; ++i0) {
#if defined(KOKKOS_MDRANGE_IVDEP)
#if defined(KOKKOS_IMPL_MDRANGE_IVDEP)
#pragma ivdep
#endif
for (int i1=b1; i1<e1; ++i1) {
@ -358,7 +358,7 @@ struct MDForFunctor
}}
} else {
for (int i1=b1; i1<e1; ++i1) {
#if defined(KOKKOS_MDRANGE_IVDEP)
#if defined(KOKKOS_IMPL_MDRANGE_IVDEP)
#pragma ivdep
#endif
for (int i0=b0; i0<e0; ++i0) {
@ -396,7 +396,7 @@ struct MDForFunctor
if ( MDRange::inner_direction == MDRange::Right ) {
for (int i0=b0; i0<e0; ++i0) {
#if defined(KOKKOS_MDRANGE_IVDEP)
#if defined(KOKKOS_IMPL_MDRANGE_IVDEP)
#pragma ivdep
#endif
for (int i1=b1; i1<e1; ++i1) {
@ -404,7 +404,7 @@ struct MDForFunctor
}}
} else {
for (int i1=b1; i1<e1; ++i1) {
#if defined(KOKKOS_MDRANGE_IVDEP)
#if defined(KOKKOS_IMPL_MDRANGE_IVDEP)
#pragma ivdep
#endif
for (int i0=b0; i0<e0; ++i0) {
@ -501,7 +501,7 @@ struct MDForFunctor
if ( MDRange::inner_direction == MDRange::Right ) {
for (int i0=b0; i0<e0; ++i0) {
for (int i1=b1; i1<e1; ++i1) {
#if defined(KOKKOS_MDRANGE_IVDEP)
#if defined(KOKKOS_IMPL_MDRANGE_IVDEP)
#pragma ivdep
#endif
for (int i2=b2; i2<e2; ++i2) {
@ -510,7 +510,7 @@ struct MDForFunctor
} else {
for (int i2=b2; i2<e2; ++i2) {
for (int i1=b1; i1<e1; ++i1) {
#if defined(KOKKOS_MDRANGE_IVDEP)
#if defined(KOKKOS_IMPL_MDRANGE_IVDEP)
#pragma ivdep
#endif
for (int i0=b0; i0<e0; ++i0) {
@ -555,7 +555,7 @@ struct MDForFunctor
if ( MDRange::inner_direction == MDRange::Right ) {
for (int i0=b0; i0<e0; ++i0) {
for (int i1=b1; i1<e1; ++i1) {
#if defined(KOKKOS_MDRANGE_IVDEP)
#if defined(KOKKOS_IMPL_MDRANGE_IVDEP)
#pragma ivdep
#endif
for (int i2=b2; i2<e2; ++i2) {
@ -564,7 +564,7 @@ struct MDForFunctor
} else {
for (int i2=b2; i2<e2; ++i2) {
for (int i1=b1; i1<e1; ++i1) {
#if defined(KOKKOS_MDRANGE_IVDEP)
#if defined(KOKKOS_IMPL_MDRANGE_IVDEP)
#pragma ivdep
#endif
for (int i0=b0; i0<e0; ++i0) {

View File

@ -41,8 +41,8 @@
//@HEADER
*/
#ifndef KOKKOS_ARRAY
#define KOKKOS_ARRAY
#ifndef KOKKOS_ARRAY_HPP
#define KOKKOS_ARRAY_HPP
#include <type_traits>
#include <algorithm>
@ -298,5 +298,5 @@ public:
} // namespace Kokkos
#endif /* #ifndef KOKKOS_ARRAY */
#endif /* #ifndef KOKKOS_ARRAY_HPP */

View File

@ -73,18 +73,18 @@
//----------------------------------------------------------------------------
#if defined(_WIN32)
#define KOKKOS_ATOMICS_USE_WINDOWS
#define KOKKOS_ENABLE_WINDOWS_ATOMICS
#else
#if defined( KOKKOS_HAVE_CUDA )
#if defined( KOKKOS_ENABLE_CUDA )
// Compiling NVIDIA device code, must use Cuda atomics:
#define KOKKOS_ATOMICS_USE_CUDA
#define KOKKOS_ENABLE_CUDA_ATOMICS
#endif
#if ! defined( KOKKOS_ATOMICS_USE_GCC ) && \
! defined( KOKKOS_ATOMICS_USE_INTEL ) && \
! defined( KOKKOS_ATOMICS_USE_OMP31 )
#if ! defined( KOKKOS_ENABLE_GNU_ATOMICS ) && \
! defined( KOKKOS_ENABLE_INTEL_ATOMICS ) && \
! defined( KOKKOS_ENABLE_OPENMP_ATOMICS )
// Compiling for non-Cuda atomic implementation has not been pre-selected.
// Choose the best implementation for the detected compiler.
@ -94,16 +94,16 @@
defined( KOKKOS_COMPILER_CLANG ) || \
( defined ( KOKKOS_COMPILER_NVCC ) )
#define KOKKOS_ATOMICS_USE_GCC
#define KOKKOS_ENABLE_GNU_ATOMICS
#elif defined( KOKKOS_COMPILER_INTEL ) || \
defined( KOKKOS_COMPILER_CRAYC )
#define KOKKOS_ATOMICS_USE_INTEL
#define KOKKOS_ENABLE_INTEL_ATOMICS
#elif defined( _OPENMP ) && ( 201107 <= _OPENMP )
#define KOKKOS_ATOMICS_USE_OMP31
#define KOKKOS_ENABLE_OPENMP_ATOMICS
#else
@ -119,7 +119,7 @@
// Forward decalaration of functions supporting arbitrary sized atomics
// This is necessary since Kokkos_Atomic.hpp is internally included very early
// through Kokkos_HostSpace.hpp as well as the allocation tracker.
#ifdef KOKKOS_HAVE_CUDA
#ifdef KOKKOS_ENABLE_CUDA
namespace Kokkos {
namespace Impl {
/// \brief Aquire a lock for the address
@ -127,7 +127,7 @@ namespace Impl {
/// This function tries to aquire the lock for the hash value derived
/// from the provided ptr. If the lock is successfully aquired the
/// function returns true. Otherwise it returns false.
#ifdef KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE
#ifdef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE
extern
#endif
__device__ inline
@ -139,7 +139,7 @@ bool lock_address_cuda_space(void* ptr);
/// from the provided ptr. This function should only be called
/// after previously successfully aquiring a lock with
/// lock_address.
#ifdef KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE
#ifdef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE
extern
#endif
__device__ inline
@ -170,16 +170,16 @@ namespace Kokkos {
inline
const char * atomic_query_version()
{
#if defined( KOKKOS_ATOMICS_USE_CUDA )
return "KOKKOS_ATOMICS_USE_CUDA" ;
#elif defined( KOKKOS_ATOMICS_USE_GCC )
return "KOKKOS_ATOMICS_USE_GCC" ;
#elif defined( KOKKOS_ATOMICS_USE_INTEL )
return "KOKKOS_ATOMICS_USE_INTEL" ;
#elif defined( KOKKOS_ATOMICS_USE_OMP31 )
return "KOKKOS_ATOMICS_USE_OMP31" ;
#elif defined( KOKKOS_ATOMICS_USE_WINDOWS )
return "KOKKOS_ATOMICS_USE_WINDOWS";
#if defined( KOKKOS_ENABLE_CUDA_ATOMICS )
return "KOKKOS_ENABLE_CUDA_ATOMICS" ;
#elif defined( KOKKOS_ENABLE_GNU_ATOMICS )
return "KOKKOS_ENABLE_GNU_ATOMICS" ;
#elif defined( KOKKOS_ENABLE_INTEL_ATOMICS )
return "KOKKOS_ENABLE_INTEL_ATOMICS" ;
#elif defined( KOKKOS_ENABLE_OPENMP_ATOMICS )
return "KOKKOS_ENABLE_OPENMP_ATOMICS" ;
#elif defined( KOKKOS_ENABLE_WINDOWS_ATOMICS )
return "KOKKOS_ENABLE_WINDOWS_ATOMICS";
#endif
}

View File

@ -185,15 +185,15 @@ public:
typedef typename std::conditional
< std::is_same< memory_space , Kokkos::HostSpace >::value
#if defined( KOKKOS_HAVE_CUDA )
#if defined( KOKKOS_ENABLE_CUDA )
|| std::is_same< memory_space , Kokkos::CudaUVMSpace >::value
|| std::is_same< memory_space , Kokkos::CudaHostPinnedSpace >::value
#endif /* #if defined( KOKKOS_HAVE_CUDA ) */
#endif /* #if defined( KOKKOS_ENABLE_CUDA ) */
, memory_space
, Kokkos::HostSpace
>::type host_memory_space ;
#if defined( KOKKOS_HAVE_CUDA )
#if defined( KOKKOS_ENABLE_CUDA )
typedef typename std::conditional
< std::is_same< execution_space , Kokkos::Cuda >::value
, Kokkos::DefaultHostExecutionSpace , execution_space

View File

@ -49,19 +49,19 @@
#include <Kokkos_Core_fwd.hpp>
#if defined( KOKKOS_HAVE_SERIAL )
#if defined( KOKKOS_ENABLE_SERIAL )
#include <Kokkos_Serial.hpp>
#endif
#if defined( KOKKOS_HAVE_OPENMP )
#if defined( KOKKOS_ENABLE_OPENMP )
#include <Kokkos_OpenMP.hpp>
#endif
#if defined( KOKKOS_HAVE_PTHREAD )
#if defined( KOKKOS_ENABLE_PTHREAD )
#include <Kokkos_Threads.hpp>
#endif
#if defined( KOKKOS_HAVE_CUDA )
#if defined( KOKKOS_ENABLE_CUDA )
#include <Kokkos_Cuda.hpp>
#endif
@ -74,9 +74,7 @@
#include <Kokkos_hwloc.hpp>
#include <Kokkos_Timer.hpp>
#ifdef KOKKOS_HAVE_CXX11
#include <Kokkos_Complex.hpp>
#endif
//----------------------------------------------------------------------------

View File

@ -83,25 +83,25 @@ namespace Kokkos {
class HostSpace ; ///< Memory space for main process and CPU execution spaces
#ifdef KOKKOS_HAVE_HBWSPACE
#ifdef KOKKOS_ENABLE_HBWSPACE
namespace Experimental {
class HBWSpace ; /// Memory space for hbw_malloc from memkind (e.g. for KNL processor)
}
#endif
#if defined( KOKKOS_HAVE_SERIAL )
#if defined( KOKKOS_ENABLE_SERIAL )
class Serial ; ///< Execution space main process on CPU
#endif // defined( KOKKOS_HAVE_SERIAL )
#endif // defined( KOKKOS_ENABLE_SERIAL )
#if defined( KOKKOS_HAVE_PTHREAD )
#if defined( KOKKOS_ENABLE_PTHREAD )
class Threads ; ///< Execution space with pthreads back-end
#endif
#if defined( KOKKOS_HAVE_OPENMP )
#if defined( KOKKOS_ENABLE_OPENMP )
class OpenMP ; ///< OpenMP execution space
#endif
#if defined( KOKKOS_HAVE_CUDA )
#if defined( KOKKOS_ENABLE_CUDA )
class CudaSpace ; ///< Memory space on Cuda GPU
class CudaUVMSpace ; ///< Memory space on Cuda GPU with UVM
class CudaHostPinnedSpace ; ///< Memory space on Host accessible to Cuda GPU
@ -122,29 +122,29 @@ struct Device;
namespace Kokkos {
#if defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_CUDA )
#if defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_CUDA )
typedef Cuda DefaultExecutionSpace ;
#elif defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_OPENMP )
#elif defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP )
typedef OpenMP DefaultExecutionSpace ;
#elif defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_THREADS )
#elif defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS )
typedef Threads DefaultExecutionSpace ;
#elif defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_SERIAL )
#elif defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_SERIAL )
typedef Serial DefaultExecutionSpace ;
#else
# error "At least one of the following execution spaces must be defined in order to use Kokkos: Kokkos::Cuda, Kokkos::OpenMP, Kokkos::Serial, or Kokkos::Threads."
#endif
#if defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_OPENMP )
#if defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP )
typedef OpenMP DefaultHostExecutionSpace ;
#elif defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_THREADS )
#elif defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS )
typedef Threads DefaultHostExecutionSpace ;
#elif defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_SERIAL )
#elif defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_SERIAL )
typedef Serial DefaultHostExecutionSpace ;
#elif defined ( KOKKOS_HAVE_OPENMP )
#elif defined ( KOKKOS_ENABLE_OPENMP )
typedef OpenMP DefaultHostExecutionSpace ;
#elif defined ( KOKKOS_HAVE_PTHREAD )
#elif defined ( KOKKOS_ENABLE_PTHREAD )
typedef Threads DefaultHostExecutionSpace ;
#elif defined ( KOKKOS_HAVE_SERIAL )
#elif defined ( KOKKOS_ENABLE_SERIAL )
typedef Serial DefaultHostExecutionSpace ;
#else
# error "At least one of the following execution spaces must be defined in order to use Kokkos: Kokkos::OpenMP, Kokkos::Serial, or Kokkos::Threads."
@ -161,7 +161,7 @@ namespace Kokkos {
namespace Kokkos {
namespace Impl {
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA ) && defined (KOKKOS_HAVE_CUDA)
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA ) && defined (KOKKOS_ENABLE_CUDA)
typedef Kokkos::CudaSpace ActiveExecutionMemorySpace ;
#elif defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
typedef Kokkos::HostSpace ActiveExecutionMemorySpace ;

View File

@ -48,7 +48,7 @@
// If CUDA execution space is enabled then use this header file.
#if defined( KOKKOS_HAVE_CUDA )
#if defined( KOKKOS_ENABLE_CUDA )
#include <iosfwd>
#include <vector>
@ -94,7 +94,7 @@ public:
//! Tag this class as a kokkos execution space
typedef Cuda execution_space ;
#if defined( KOKKOS_USE_CUDA_UVM )
#if defined( KOKKOS_ENABLE_CUDA_UVM )
//! This execution space's preferred memory space.
typedef CudaUVMSpace memory_space ;
#else
@ -240,7 +240,7 @@ struct MemorySpaceAccess
enum { deepcopy = false };
};
#if defined( KOKKOS_USE_CUDA_UVM )
#if defined( KOKKOS_ENABLE_CUDA_UVM )
// If forcing use of UVM everywhere
// then must assume that CudaUVMSpace
@ -297,7 +297,7 @@ struct VerifyExecutionCanAccessMemorySpace
//----------------------------------------------------------------------------
#endif /* #if defined( KOKKOS_HAVE_CUDA ) */
#endif /* #if defined( KOKKOS_ENABLE_CUDA ) */
#endif /* #ifndef KOKKOS_CUDA_HPP */

View File

@ -46,7 +46,7 @@
#include <Kokkos_Core_fwd.hpp>
#if defined( KOKKOS_HAVE_CUDA )
#if defined( KOKKOS_ENABLE_CUDA )
#include <iosfwd>
#include <typeinfo>
@ -939,6 +939,6 @@ public:
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif /* #if defined( KOKKOS_HAVE_CUDA ) */
#endif /* #if defined( KOKKOS_ENABLE_CUDA ) */
#endif /* #define KOKKOS_CUDASPACE_HPP */

View File

@ -48,7 +48,7 @@
#include <Kokkos_HostSpace.hpp>
/*--------------------------------------------------------------------------*/
#ifdef KOKKOS_HAVE_HBWSPACE
#ifdef KOKKOS_ENABLE_HBWSPACE
namespace Kokkos {
namespace Experimental {
@ -102,15 +102,15 @@ public:
/// Every memory space has a default execution space. This is
/// useful for things like initializing a View (which happens in
/// parallel using the View's default execution space).
#if defined( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_OPENMP )
#if defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP )
typedef Kokkos::OpenMP execution_space ;
#elif defined( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_THREADS )
#elif defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS )
typedef Kokkos::Threads execution_space ;
#elif defined( KOKKOS_HAVE_OPENMP )
#elif defined( KOKKOS_ENABLE_OPENMP )
typedef Kokkos::OpenMP execution_space ;
#elif defined( KOKKOS_HAVE_PTHREAD )
#elif defined( KOKKOS_ENABLE_PTHREAD )
typedef Kokkos::Threads execution_space ;
#elif defined( KOKKOS_HAVE_SERIAL )
#elif defined( KOKKOS_ENABLE_SERIAL )
typedef Kokkos::Serial execution_space ;
#else
# error "At least one of the following host execution spaces must be defined: Kokkos::OpenMP, Kokkos::Serial, or Kokkos::Threads. You might be seeing this message if you disabled the Kokkos::Serial device explicitly using the Kokkos_ENABLE_Serial:BOOL=OFF CMake option, but did not enable any of the other host execution space devices."

View File

@ -108,15 +108,15 @@ public:
/// Every memory space has a default execution space. This is
/// useful for things like initializing a View (which happens in
/// parallel using the View's default execution space).
#if defined( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_OPENMP )
#if defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP )
typedef Kokkos::OpenMP execution_space ;
#elif defined( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_THREADS )
#elif defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS )
typedef Kokkos::Threads execution_space ;
#elif defined( KOKKOS_HAVE_OPENMP )
#elif defined( KOKKOS_ENABLE_OPENMP )
typedef Kokkos::OpenMP execution_space ;
#elif defined( KOKKOS_HAVE_PTHREAD )
#elif defined( KOKKOS_ENABLE_PTHREAD )
typedef Kokkos::Threads execution_space ;
#elif defined( KOKKOS_HAVE_SERIAL )
#elif defined( KOKKOS_ENABLE_SERIAL )
typedef Kokkos::Serial execution_space ;
#else
# error "At least one of the following host execution spaces must be defined: Kokkos::OpenMP, Kokkos::Serial, or Kokkos::Threads. You might be seeing this message if you disabled the Kokkos::Serial device explicitly using the Kokkos_ENABLE_Serial:BOOL=OFF CMake option, but did not enable any of the other host execution space devices."

View File

@ -47,23 +47,24 @@
//----------------------------------------------------------------------------
/** Pick up configure/build options via #define macros:
*
* KOKKOS_HAVE_CUDA Kokkos::Cuda execution and memory spaces
* KOKKOS_HAVE_PTHREAD Kokkos::Threads execution space
* KOKKOS_HAVE_QTHREAD Kokkos::Qthread execution space
* KOKKOS_HAVE_OPENMP Kokkos::OpenMP execution space
* KOKKOS_HAVE_HWLOC HWLOC library is available
* KOKKOS_ENABLE_CUDA Kokkos::Cuda execution and memory spaces
* KOKKOS_ENABLE_PTHREAD Kokkos::Threads execution space
* KOKKOS_ENABLE_QTHREAD Kokkos::Qthread execution space
* KOKKOS_ENABLE_OPENMP Kokkos::OpenMP execution space
* KOKKOS_ENABLE_HWLOC HWLOC library is available
* KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK insert array bounds checks, is expensive!
* KOKKOS_HAVE_CXX11 enable C++11 features
*
* KOKKOS_HAVE_MPI negotiate MPI/execution space interactions
* KOKKOS_ENABLE_MPI negotiate MPI/execution space interactions
*
* KOKKOS_USE_CUDA_UVM Use CUDA UVM for Cuda memory space
* KOKKOS_ENABLE_CUDA_UVM Use CUDA UVM for Cuda memory space
*/
#ifndef KOKKOS_DONT_INCLUDE_CORE_CONFIG_H
#include <KokkosCore_config.h>
#endif
#include <impl/Kokkos_OldMacros.hpp>
//----------------------------------------------------------------------------
/** Pick up compiler specific #define macros:
*
@ -80,10 +81,10 @@
*
* Macros for which compiler extension to use for atomics on intrinsice types
*
* KOKKOS_ATOMICS_USE_CUDA
* KOKKOS_ATOMICS_USE_GNU
* KOKKOS_ATOMICS_USE_INTEL
* KOKKOS_ATOMICS_USE_OPENMP31
* KOKKOS_ENABLE_CUDA_ATOMICS
* KOKKOS_ENABLE_GNU_ATOMICS
* KOKKOS_ENABLE_INTEL_ATOMICS
* KOKKOS_ENABLE_OPENMP_ATOMICS
*
* A suite of 'KOKKOS_HAVE_PRAGMA_...' are defined for internal use.
*
@ -96,7 +97,7 @@
//----------------------------------------------------------------------------
#if defined( KOKKOS_HAVE_CUDA ) && defined( __CUDACC__ )
#if defined( KOKKOS_ENABLE_CUDA ) && defined( __CUDACC__ )
/* Compiling with a CUDA compiler.
*
@ -126,7 +127,7 @@
#error "Cuda device capability >= 3.0 is required"
#endif
#ifdef KOKKOS_CUDA_USE_LAMBDA
#ifdef KOKKOS_ENABLE_CUDA_LAMBDA
#if ( CUDA_VERSION < 7050 )
// CUDA supports C++11 lambdas generated in host code to be given
// to the device starting with version 7.5. But the release candidate (7.5.6)
@ -137,18 +138,18 @@
#define KOKKOS_LAMBDA [=]__device__
#else
#define KOKKOS_LAMBDA [=]__host__ __device__
#if defined( KOKKOS_HAVE_CXX1Z )
#if defined( KOKKOS_ENABLE_CXX1Z )
#define KOKKOS_CLASS_LAMBDA [=,*this] __host__ __device__
#endif
#endif
#define KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA 1
#define KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA 1
#endif
#endif /* #if defined( KOKKOS_HAVE_CUDA ) && defined( __CUDACC__ ) */
#endif /* #if defined( KOKKOS_ENABLE_CUDA ) && defined( __CUDACC__ ) */
#if defined(KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA)
#if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA)
// Cuda version 8.0 still needs the functor wrapper
#if (KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA /* && (CUDA_VERSION < 8000) */ ) && defined(__NVCC__)
#if (KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA /* && (CUDA_VERSION < 8000) */ ) && defined(__NVCC__)
#define KOKKOS_IMPL_NEED_FUNCTOR_WRAPPER
#endif
#endif
@ -156,7 +157,7 @@
/*--------------------------------------------------------------------------*/
/* Language info: C++, CUDA, OPENMP */
#if defined( KOKKOS_HAVE_CUDA )
#if defined( KOKKOS_ENABLE_CUDA )
// Compiling Cuda code to 'ptx'
#define KOKKOS_FORCEINLINE_FUNCTION __device__ __host__ __forceinline__
@ -185,21 +186,21 @@
#define KOKKOS_COMPILER_NVCC __NVCC__
#else
#if defined( KOKKOS_HAVE_CXX11 ) && ! defined( KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA )
#if !defined (KOKKOS_HAVE_CUDA) // Compiling with clang for Cuda does not work with LAMBDAs either
#if ! defined( KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA )
#if !defined (KOKKOS_ENABLE_CUDA) // Compiling with clang for Cuda does not work with LAMBDAs either
// CUDA (including version 6.5) does not support giving lambdas as
// arguments to global functions. Thus its not currently possible
// to dispatch lambdas from the host.
#define KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA 1
#define KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA 1
#endif
#endif
#endif /* #if defined( __NVCC__ ) */
#if defined( KOKKOS_HAVE_CXX11 ) && !defined (KOKKOS_LAMBDA)
#if !defined (KOKKOS_LAMBDA)
#define KOKKOS_LAMBDA [=]
#endif
#if defined( KOKKOS_HAVE_CXX1Z ) && !defined (KOKKOS_CLASS_LAMBDA)
#if defined( KOKKOS_ENABLE_CXX1Z ) && !defined (KOKKOS_CLASS_LAMBDA)
#define KOKKOS_CLASS_LAMBDA [=,*this]
#endif
@ -259,11 +260,11 @@
#if defined( KOKKOS_COMPILER_INTEL )
#define KOKKOS_HAVE_PRAGMA_UNROLL 1
#define KOKKOS_HAVE_PRAGMA_IVDEP 1
#define KOKKOS_HAVE_PRAGMA_LOOPCOUNT 1
#define KOKKOS_HAVE_PRAGMA_VECTOR 1
#define KOKKOS_HAVE_PRAGMA_SIMD 1
#define KOKKOS_ENABLE_PRAGMA_UNROLL 1
#define KOKKOS_ENABLE_PRAGMA_IVDEP 1
#define KOKKOS_ENABLE_PRAGMA_LOOPCOUNT 1
#define KOKKOS_ENABLE_PRAGMA_VECTOR 1
#define KOKKOS_ENABLE_PRAGMA_SIMD 1
#define KOKKOS_RESTRICT __restrict__
@ -317,11 +318,11 @@
#if defined( KOKKOS_COMPILER_IBM )
#define KOKKOS_HAVE_PRAGMA_UNROLL 1
//#define KOKKOS_HAVE_PRAGMA_IVDEP 1
//#define KOKKOS_HAVE_PRAGMA_LOOPCOUNT 1
//#define KOKKOS_HAVE_PRAGMA_VECTOR 1
//#define KOKKOS_HAVE_PRAGMA_SIMD 1
#define KOKKOS_ENABLE_PRAGMA_UNROLL 1
//#define KOKKOS_ENABLE_PRAGMA_IVDEP 1
//#define KOKKOS_ENABLE_PRAGMA_LOOPCOUNT 1
//#define KOKKOS_ENABLE_PRAGMA_VECTOR 1
//#define KOKKOS_ENABLE_PRAGMA_SIMD 1
#endif
@ -330,11 +331,11 @@
#if defined( KOKKOS_COMPILER_CLANG )
//#define KOKKOS_HAVE_PRAGMA_UNROLL 1
//#define KOKKOS_HAVE_PRAGMA_IVDEP 1
//#define KOKKOS_HAVE_PRAGMA_LOOPCOUNT 1
//#define KOKKOS_HAVE_PRAGMA_VECTOR 1
//#define KOKKOS_HAVE_PRAGMA_SIMD 1
//#define KOKKOS_ENABLE_PRAGMA_UNROLL 1
//#define KOKKOS_ENABLE_PRAGMA_IVDEP 1
//#define KOKKOS_ENABLE_PRAGMA_LOOPCOUNT 1
//#define KOKKOS_ENABLE_PRAGMA_VECTOR 1
//#define KOKKOS_ENABLE_PRAGMA_SIMD 1
#if ! defined( KOKKOS_FORCEINLINE_FUNCTION )
#define KOKKOS_FORCEINLINE_FUNCTION inline __attribute__((always_inline))
@ -347,11 +348,11 @@
#if defined( KOKKOS_COMPILER_GNU )
//#define KOKKOS_HAVE_PRAGMA_UNROLL 1
//#define KOKKOS_HAVE_PRAGMA_IVDEP 1
//#define KOKKOS_HAVE_PRAGMA_LOOPCOUNT 1
//#define KOKKOS_HAVE_PRAGMA_VECTOR 1
//#define KOKKOS_HAVE_PRAGMA_SIMD 1
//#define KOKKOS_ENABLE_PRAGMA_UNROLL 1
//#define KOKKOS_ENABLE_PRAGMA_IVDEP 1
//#define KOKKOS_ENABLE_PRAGMA_LOOPCOUNT 1
//#define KOKKOS_ENABLE_PRAGMA_VECTOR 1
//#define KOKKOS_ENABLE_PRAGMA_SIMD 1
#if ! defined( KOKKOS_FORCEINLINE_FUNCTION )
#define KOKKOS_FORCEINLINE_FUNCTION inline __attribute__((always_inline))
@ -371,11 +372,11 @@
#if defined( KOKKOS_COMPILER_PGI )
#define KOKKOS_HAVE_PRAGMA_UNROLL 1
#define KOKKOS_HAVE_PRAGMA_IVDEP 1
//#define KOKKOS_HAVE_PRAGMA_LOOPCOUNT 1
#define KOKKOS_HAVE_PRAGMA_VECTOR 1
//#define KOKKOS_HAVE_PRAGMA_SIMD 1
#define KOKKOS_ENABLE_PRAGMA_UNROLL 1
#define KOKKOS_ENABLE_PRAGMA_IVDEP 1
//#define KOKKOS_ENABLE_PRAGMA_LOOPCOUNT 1
#define KOKKOS_ENABLE_PRAGMA_VECTOR 1
//#define KOKKOS_ENABLE_PRAGMA_SIMD 1
#endif
@ -384,7 +385,7 @@
#if defined( KOKKOS_COMPILER_NVCC )
#if defined(__CUDA_ARCH__ )
#define KOKKOS_HAVE_PRAGMA_UNROLL 1
#define KOKKOS_ENABLE_PRAGMA_UNROLL 1
#endif
#endif
@ -426,19 +427,15 @@
#define KOKKOS_ALIGN_PTR(size) __attribute__((aligned(size)))
#endif
#if ! defined(KOKKOS_ALIGN_16)
#define KOKKOS_ALIGN_16 KOKKOS_ALIGN(16)
#endif
//----------------------------------------------------------------------------
/** Determine the default execution space for parallel dispatch.
* There is zero or one default execution space specified.
*/
#if 1 < ( ( defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_CUDA ) ? 1 : 0 ) + \
( defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_OPENMP ) ? 1 : 0 ) + \
( defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_THREADS ) ? 1 : 0 ) + \
( defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_SERIAL ) ? 1 : 0 ) )
#if 1 < ( ( defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_CUDA ) ? 1 : 0 ) + \
( defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP ) ? 1 : 0 ) + \
( defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS ) ? 1 : 0 ) + \
( defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_SERIAL ) ? 1 : 0 ) )
#error "More than one KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_* specified" ;
@ -447,24 +444,24 @@
/** If default is not specified then chose from enabled execution spaces.
* Priority: CUDA, OPENMP, THREADS, SERIAL
*/
#if defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_CUDA )
#elif defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_OPENMP )
#elif defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_THREADS )
#elif defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_SERIAL )
#elif defined ( KOKKOS_HAVE_CUDA )
#define KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_CUDA
#elif defined ( KOKKOS_HAVE_OPENMP )
#define KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_OPENMP
#elif defined ( KOKKOS_HAVE_PTHREAD )
#define KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_THREADS
#if defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_CUDA )
#elif defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP )
#elif defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS )
#elif defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_SERIAL )
#elif defined ( KOKKOS_ENABLE_CUDA )
#define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_CUDA
#elif defined ( KOKKOS_ENABLE_OPENMP )
#define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP
#elif defined ( KOKKOS_ENABLE_PTHREAD )
#define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS
#else
#define KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_SERIAL
#define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_SERIAL
#endif
//----------------------------------------------------------------------------
/** Determine for what space the code is being compiled: */
#if defined( __CUDACC__ ) && defined( __CUDA_ARCH__ ) && defined (KOKKOS_HAVE_CUDA)
#if defined( __CUDACC__ ) && defined( __CUDA_ARCH__ ) && defined (KOKKOS_ENABLE_CUDA)
#define KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA
#else
#define KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
@ -476,7 +473,7 @@
#if ( defined( _POSIX_C_SOURCE ) && _POSIX_C_SOURCE >= 200112L ) || \
( defined( _XOPEN_SOURCE ) && _XOPEN_SOURCE >= 600 )
#if defined(KOKKOS_ENABLE_PERFORMANCE_POSIX_MEMALIGN)
#define KOKKOS_POSIX_MEMALIGN_AVAILABLE 1
#define KOKKOS_ENABLE_POSIX_MEMALIGN 1
#endif
#endif
@ -489,15 +486,6 @@
#define KOKKOS_ENABLE_PROFILING 1
#endif
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
/* Transitional macro to change between old and new View
* are no longer supported.
*/
#define KOKKOS_USING_EXP_VIEW 1
#define KOKKOS_USING_EXPERIMENTAL_VIEW
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------

View File

@ -57,18 +57,18 @@
// How should errors be handled? In general, production code should return a
// value indicating failure so the user can decide how the error is handled.
// While experimental, code can abort instead. If KOKKOS_MEMPOOL_PRINTERR is
// While experimental, code can abort instead. If KOKKOS_ENABLE_MEMPOOL_PRINTERR is
// defined, the code will abort with an error message. Otherwise, the code will
// return with a value indicating failure when possible, or do nothing instead.
//#define KOKKOS_MEMPOOL_PRINTERR
//#define KOKKOS_ENABLE_MEMPOOL_PRINTERR
//#define KOKKOS_MEMPOOL_PRINT_INFO
//#define KOKKOS_MEMPOOL_PRINT_CONSTRUCTOR_INFO
//#define KOKKOS_MEMPOOL_PRINT_BLOCKSIZE_INFO
//#define KOKKOS_MEMPOOL_PRINT_SUPERBLOCK_INFO
//#define KOKKOS_MEMPOOL_PRINT_ACTIVE_SUPERBLOCKS
//#define KOKKOS_MEMPOOL_PRINT_PAGE_INFO
//#define KOKKOS_MEMPOOL_PRINT_INDIVIDUAL_PAGE_INFO
//#define KOKKOS_ENABLE_MEMPOOL_PRINT_INFO
//#define KOKKOS_ENABLE_MEMPOOL_PRINT_CONSTRUCTOR_INFO
//#define KOKKOS_ENABLE_MEMPOOL_PRINT_BLOCKSIZE_INFO
//#define KOKKOS_ENABLE_MEMPOOL_PRINT_SUPERBLOCK_INFO
//#define KOKKOS_ENABLE_MEMPOOL_PRINT_ACTIVE_SUPERBLOCKS
//#define KOKKOS_ENABLE_MEMPOOL_PRINT_PAGE_INFO
//#define KOKKOS_ENABLE_MEMPOOL_PRINT_INDIVIDUAL_PAGE_INFO
//----------------------------------------------------------------------------
@ -451,7 +451,7 @@ struct create_histogram {
}
};
#ifdef KOKKOS_MEMPOOL_PRINT_SUPERBLOCK_INFO
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINT_SUPERBLOCK_INFO
template < typename UInt32View, typename SBHeaderView, typename MempoolBitset >
struct count_allocated_blocks {
typedef typename UInt32View::execution_space execution_space;
@ -790,7 +790,7 @@ public:
}
}
#ifdef KOKKOS_MEMPOOL_PRINT_CONSTRUCTOR_INFO
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINT_CONSTRUCTOR_INFO
printf( "\n" );
printf( " m_lg_sb_size: %12lu\n", m_lg_sb_size );
printf( " m_sb_size: %12lu\n", m_sb_size );
@ -810,7 +810,7 @@ public:
fflush( stdout );
#endif
#ifdef KOKKOS_MEMPOOL_PRINT_BLOCKSIZE_INFO
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINT_BLOCKSIZE_INFO
// Print the blocksize info for all the block sizes.
printf( "SIZE BLOCKS_PER_SB PAGES_PER_SB SB_FULL_LEVEL PAGE_FULL_LEVEL\n" );
for ( size_t i = 0; i < m_num_block_size; ++i ) {
@ -845,7 +845,7 @@ public:
uint32_t blocks_per_sb = m_blocksize_info[block_size_id].m_blocks_per_sb;
uint32_t pages_per_sb = m_blocksize_info[block_size_id].m_pages_per_sb;
#ifdef KOKKOS_CUDA_CLANG_WORKAROUND
#ifdef KOKKOS_IMPL_CUDA_CLANG_WORKAROUND
// Without this test it looks like pages_per_sb might come back wrong.
if ( pages_per_sb == 0 ) return NULL;
#endif
@ -966,7 +966,7 @@ public:
if ( new_sb_id == sb_id ) {
allocation_done = true;
#ifdef KOKKOS_MEMPOOL_PRINT_INFO
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINT_INFO
printf( "** No superblocks available. **\n" );
#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
fflush( stdout );
@ -979,7 +979,7 @@ public:
}
}
}
#ifdef KOKKOS_MEMPOOL_PRINT_INFO
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINT_INFO
else {
printf( "** Requested allocation size (%zu) larger than superblock size (%lu). **\n",
alloc_size, m_sb_size );
@ -1068,7 +1068,7 @@ public:
}
}
}
#ifdef KOKKOS_MEMPOOL_PRINTERR
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINTERR
else {
printf( "\n** MemoryPool::deallocate() ADDRESS_OUT_OF_RANGE(0x%llx) **\n",
reinterpret_cast<uint64_t>( alloc_ptr ) );
@ -1109,7 +1109,7 @@ public:
{
printf( "\n" );
#ifdef KOKKOS_MEMPOOL_PRINT_SUPERBLOCK_INFO
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINT_SUPERBLOCK_INFO
typename SBHeaderView::HostMirror host_sb_header = create_mirror_view( m_sb_header );
deep_copy( host_sb_header, m_sb_header );
@ -1188,7 +1188,7 @@ public:
num_active_sb += host_active(i) != INVALID_SUPERBLOCK;
}
#ifdef KOKKOS_MEMPOOL_PRINT_ACTIVE_SUPERBLOCKS
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINT_ACTIVE_SUPERBLOCKS
// Print active superblocks.
printf( "BS_ID SB_ID\n" );
for ( size_t i = 0; i < m_num_block_size; ++i ) {
@ -1208,7 +1208,7 @@ public:
fflush( stdout );
#endif
#ifdef KOKKOS_MEMPOOL_PRINT_PAGE_INFO
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINT_PAGE_INFO
// Print the summary page histogram.
printf( "USED_BLOCKS PAGE_COUNT\n" );
for ( uint32_t i = 0; i < 33; ++i ) {
@ -1217,7 +1217,7 @@ public:
printf( "\n" );
#endif
#ifdef KOKKOS_MEMPOOL_PRINT_INDIVIDUAL_PAGE_INFO
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINT_INDIVIDUAL_PAGE_INFO
// Print the page histogram for a few individual superblocks.
// const uint32_t num_sb_id = 2;
// uint32_t sb_id[num_sb_id] = { 0, 10 };
@ -1484,7 +1484,7 @@ private:
// 1. An invalid superblock should never be found here.
// 2. If the new superblock is the same as the previous superblock, the
// allocator is empty.
#ifdef KOKKOS_MEMPOOL_PRINTERR
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINTERR
if ( new_sb == INVALID_SUPERBLOCK ) {
printf( "\n** MemoryPool::find_superblock() FOUND_INACTIVE_SUPERBLOCK **\n" );
#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
@ -1531,28 +1531,28 @@ private:
} // namespace Experimental
} // namespace Kokkos
#ifdef KOKKOS_MEMPOOL_PRINTERR
#undef KOKKOS_MEMPOOL_PRINTERR
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINTERR
#undef KOKKOS_ENABLE_MEMPOOL_PRINTERR
#endif
#ifdef KOKKOS_MEMPOOL_PRINT_INFO
#undef KOKKOS_MEMPOOL_PRINT_INFO
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINT_INFO
#undef KOKKOS_ENABLE_MEMPOOL_PRINT_INFO
#endif
#ifdef KOKKOS_MEMPOOL_PRINT_BLOCKSIZE_INFO
#undef KOKKOS_MEMPOOL_PRINT_BLOCKSIZE_INFO
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINT_BLOCKSIZE_INFO
#undef KOKKOS_ENABLE_MEMPOOL_PRINT_BLOCKSIZE_INFO
#endif
#ifdef KOKKOS_MEMPOOL_PRINT_SUPERBLOCK_INFO
#undef KOKKOS_MEMPOOL_PRINT_SUPERBLOCK_INFO
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINT_SUPERBLOCK_INFO
#undef KOKKOS_ENABLE_MEMPOOL_PRINT_SUPERBLOCK_INFO
#endif
#ifdef KOKKOS_MEMPOOL_PRINT_PAGE_INFO
#undef KOKKOS_MEMPOOL_PRINT_PAGE_INFO
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINT_PAGE_INFO
#undef KOKKOS_ENABLE_MEMPOOL_PRINT_PAGE_INFO
#endif
#ifdef KOKKOS_MEMPOOL_PRINT_INDIVIDUAL_PAGE_INFO
#undef KOKKOS_MEMPOOL_PRINT_INDIVIDUAL_PAGE_INFO
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINT_INDIVIDUAL_PAGE_INFO
#undef KOKKOS_ENABLE_MEMPOOL_PRINT_INDIVIDUAL_PAGE_INFO
#endif
#endif // KOKKOS_MEMORYPOOL_HPP

View File

@ -46,14 +46,18 @@
#include <Kokkos_Core_fwd.hpp>
#if defined( KOKKOS_HAVE_OPENMP ) && defined( _OPENMP )
#if defined( KOKKOS_ENABLE_OPENMP) && !defined(_OPENMP)
#error "You enabled Kokkos OpenMP support without enabling OpenMP in the compiler!"
#endif
#if defined( KOKKOS_ENABLE_OPENMP ) && defined( _OPENMP )
#include <omp.h>
#include <cstddef>
#include <iosfwd>
#include <Kokkos_HostSpace.hpp>
#ifdef KOKKOS_HAVE_HBWSPACE
#ifdef KOKKOS_ENABLE_HBWSPACE
#include <Kokkos_HBWSpace.hpp>
#endif
#include <Kokkos_ScratchSpace.hpp>
@ -77,7 +81,7 @@ public:
//! Tag this class as a kokkos execution space
typedef OpenMP execution_space ;
#ifdef KOKKOS_HAVE_HBWSPACE
#ifdef KOKKOS_ENABLE_HBWSPACE
typedef Experimental::HBWSpace memory_space ;
#else
typedef HostSpace memory_space ;
@ -194,7 +198,7 @@ struct VerifyExecutionCanAccessMemorySpace
/*--------------------------------------------------------------------------*/
#endif /* #if defined( KOKKOS_HAVE_OPENMP ) && defined( _OPENMP ) */
#endif /* #if defined( KOKKOS_ENABLE_OPENMP ) && defined( _OPENMP ) */
#endif /* #ifndef KOKKOS_OPENMP_HPP */

View File

@ -61,7 +61,7 @@
#include <impl/Kokkos_Traits.hpp>
#include <impl/Kokkos_FunctorAdapter.hpp>
#ifdef KOKKOS_HAVE_DEBUG
#ifdef KOKKOS_DEBUG
#include<iostream>
#endif

View File

@ -978,7 +978,7 @@ struct ParallelReduceReturnValue<typename std::enable_if<Kokkos::is_view<ReturnT
typedef InvalidType reducer_type;
typedef typename return_type::value_type value_type_scalar;
typedef typename return_type::value_type value_type_array[];
typedef typename return_type::value_type* const value_type_array;
typedef typename if_c<return_type::rank==0,value_type_scalar,value_type_array>::type value_type;

View File

@ -106,14 +106,14 @@ public:
void* tmp = m_iter_L0 + m_offset * align (size);
if (m_end_L0 < (m_iter_L0 += align (size) * m_multiplier)) {
m_iter_L0 -= align (size) * m_multiplier; // put it back like it was
#ifdef KOKKOS_HAVE_DEBUG
#ifdef KOKKOS_DEBUG
// mfh 23 Jun 2015: printf call consumes 25 registers
// in a CUDA build, so only print in debug mode. The
// function still returns NULL if not enough memory.
printf ("ScratchMemorySpace<...>::get_shmem: Failed to allocate "
"%ld byte(s); remaining capacity is %ld byte(s)\n", long(size),
long(m_end_L0-m_iter_L0));
#endif // KOKKOS_HAVE_DEBUG
#endif // KOKKOS_DEBUG
tmp = 0;
}
return tmp;
@ -121,14 +121,14 @@ public:
void* tmp = m_iter_L1 + m_offset * align (size);
if (m_end_L1 < (m_iter_L1 += align (size) * m_multiplier)) {
m_iter_L1 -= align (size) * m_multiplier; // put it back like it was
#ifdef KOKKOS_HAVE_DEBUG
#ifdef KOKKOS_DEBUG
// mfh 23 Jun 2015: printf call consumes 25 registers
// in a CUDA build, so only print in debug mode. The
// function still returns NULL if not enough memory.
printf ("ScratchMemorySpace<...>::get_shmem: Failed to allocate "
"%ld byte(s); remaining capacity is %ld byte(s)\n", long(size),
long(m_end_L1-m_iter_L1));
#endif // KOKKOS_HAVE_DEBUG
#endif // KOKKOS_DEBUG
tmp = 0;
}
return tmp;

View File

@ -61,7 +61,7 @@
#include <KokkosExp_MDRangePolicy.hpp>
#if defined( KOKKOS_HAVE_SERIAL )
#if defined( KOKKOS_ENABLE_SERIAL )
namespace Kokkos {
@ -1005,7 +1005,7 @@ template<typename iType, class Lambda>
KOKKOS_INLINE_FUNCTION
void parallel_for(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::SerialTeamMember >&
loop_boundaries, const Lambda& lambda) {
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
#pragma ivdep
#endif
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment)
@ -1021,7 +1021,7 @@ KOKKOS_INLINE_FUNCTION
void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::SerialTeamMember >&
loop_boundaries, const Lambda & lambda, ValueType& result) {
result = ValueType();
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
#pragma ivdep
#endif
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
@ -1044,7 +1044,7 @@ void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::S
loop_boundaries, const Lambda & lambda, const JoinType& join, ValueType& init_result) {
ValueType result = init_result;
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
#pragma ivdep
#endif
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
@ -1075,7 +1075,7 @@ void parallel_scan(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::Ser
value_type scan_val = value_type();
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
#pragma ivdep
#endif
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
@ -1116,7 +1116,7 @@ void single(const Impl::ThreadSingleStruct<Impl::SerialTeamMember>& , const Func
#include <impl/Kokkos_Serial_Task.hpp>
#endif // defined( KOKKOS_HAVE_SERIAL )
#endif // defined( KOKKOS_ENABLE_SERIAL )
#endif /* #define KOKKOS_SERIAL_HPP */
//----------------------------------------------------------------------------

View File

@ -52,9 +52,9 @@
// and use relocateable device code to enable the task policy.
// nvcc relocatable device code option: --relocatable-device-code=true
#if ( defined( KOKKOS_HAVE_CUDA ) )
#if ( defined( KOKKOS_ENABLE_CUDA ) )
#if ( 8000 <= CUDA_VERSION ) && \
defined( KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE )
defined( KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE )
#define KOKKOS_ENABLE_TASKDAG
@ -63,7 +63,6 @@
#define KOKKOS_ENABLE_TASKDAG
#endif
#if defined( KOKKOS_ENABLE_TASKDAG )
//----------------------------------------------------------------------------
@ -90,6 +89,34 @@ class TaskScheduler ;
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
/*\brief Implementation data for task data management, access, and execution.
*
* CRTP Inheritance structure to allow static_cast from the
* task root type and a task's FunctorType.
*
* TaskBase< Space , ResultType , FunctorType >
* : TaskBase< Space , ResultType , void >
* , FunctorType
* { ... };
*
* TaskBase< Space , ResultType , void >
* : TaskBase< Space , void , void >
* { ... };
*/
template< typename Space , typename ResultType , typename FunctorType >
class TaskBase ;
template< typename Space >
class TaskExec ;
} // namespace Impl
} // namespace Kokkos
//----------------------------------------------------------------------------
namespace Kokkos {
/**
@ -302,14 +329,6 @@ enum TaskPriority { TaskHighPriority = 0
template< typename Space >
void wait( TaskScheduler< Space > const & );
} // namespace Kokkos
//----------------------------------------------------------------------------
namespace Kokkos {
} // namespace Kokkos
//----------------------------------------------------------------------------
@ -363,20 +382,7 @@ private:
, Future< A1 , A2 > const & arg
, Options const & ... opts )
{
// Assign dependence to task->m_next
// which will be processed within subsequent call to schedule.
// Error if the dependence is reset.
if ( 0 != Kokkos::atomic_exchange(& task->m_next, arg.m_task) ) {
Kokkos::abort("TaskScheduler ERROR: resetting task dependence");
}
if ( 0 != arg.m_task ) {
// The future may be destroyed upon returning from this call
// so increment reference count to track this assignment.
Kokkos::atomic_increment( &(arg.m_task->m_ref_count) );
}
task->add_dependence( arg.m_task );
assign( task , opts ... );
}
@ -558,8 +564,7 @@ public:
// Potentially spawning outside execution space so the
// apply function pointer must be obtained from execution space.
// Required for Cuda execution space function pointer.
queue_type::specialization::template
proc_set_apply< FunctorType >( & f.m_task->m_apply );
m_queue->template proc_set_apply< FunctorType >( & f.m_task->m_apply );
m_queue->schedule( f.m_task );
}
@ -612,7 +617,7 @@ public:
for ( int i = 0 ; i < narg ; ++i ) {
task_base * const t = dep[i] = arg[i].m_task ;
if ( 0 != t ) {
Kokkos::atomic_increment( &(t->m_ref_count) );
Kokkos::atomic_increment( &(t->m_ref_count) );
}
}
@ -638,25 +643,13 @@ public:
, value_type
, FunctorType > ;
task_base * const zero = (task_base *) 0 ;
task_base * const lock = (task_base *) task_base::LockTag ;
task_type * const task = static_cast< task_type * >( task_self );
// Precondition:
// task is in Executing state
// therefore m_next == LockTag
//
// Change to m_next == 0 for no dependence
if ( lock != Kokkos::atomic_exchange( & task->m_next, zero ) ) {
Kokkos::abort("TaskScheduler::respawn ERROR: already respawned");
}
// Reschedule task with no dependences.
m_queue->reschedule( task );
// Dependences, if requested, are added here through parsing the arguments.
assign( task , arg_options... );
// Postcondition:
// task is in Executing-Respawn state
// therefore m_next == dependece or 0
}
//----------------------------------------
@ -697,4 +690,3 @@ void wait( TaskScheduler< ExecSpace > const & policy )
#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */
#endif /* #ifndef KOKKOS_TASKSCHEDULER_HPP */

View File

@ -46,7 +46,7 @@
#include <Kokkos_Core_fwd.hpp>
#if defined( KOKKOS_HAVE_PTHREAD )
#if defined( KOKKOS_ENABLE_PTHREAD )
#include <cstddef>
#include <iosfwd>
@ -227,7 +227,7 @@ struct VerifyExecutionCanAccessMemorySpace
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif /* #if defined( KOKKOS_HAVE_PTHREAD ) */
#endif /* #if defined( KOKKOS_ENABLE_PTHREAD ) */
#endif /* #define KOKKOS_THREADS_HPP */

View File

@ -47,10 +47,10 @@
#include <stddef.h>
#ifdef _MSC_VER
#undef KOKKOS_USE_LIBRT
#undef KOKKOS_ENABLE_LIBRT
#include <gettimeofday.c>
#else
#ifdef KOKKOS_USE_LIBRT
#ifdef KOKKOS_ENABLE_LIBRT
#include <ctime>
#else
#include <sys/time.h>
@ -63,7 +63,7 @@ namespace Kokkos {
class Timer {
private:
#ifdef KOKKOS_USE_LIBRT
#ifdef KOKKOS_ENABLE_LIBRT
struct timespec m_old;
#else
struct timeval m_old ;
@ -74,7 +74,7 @@ public:
inline
void reset() {
#ifdef KOKKOS_USE_LIBRT
#ifdef KOKKOS_ENABLE_LIBRT
clock_gettime(CLOCK_REALTIME, &m_old);
#else
gettimeofday( & m_old , ((struct timezone *) NULL ) );
@ -90,7 +90,7 @@ public:
inline
double seconds() const
{
#ifdef KOKKOS_USE_LIBRT
#ifdef KOKKOS_ENABLE_LIBRT
struct timespec m_new;
clock_gettime(CLOCK_REALTIME, &m_new);

View File

@ -46,7 +46,7 @@
#ifndef KOKKOS_VECTORIZATION_HPP
#define KOKKOS_VECTORIZATION_HPP
#if defined( KOKKOS_HAVE_CUDA )
#if defined( KOKKOS_ENABLE_CUDA )
#include <Cuda/Kokkos_Cuda_Vectorization.hpp>
#endif

View File

@ -623,13 +623,13 @@ private:
#if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK )
#define KOKKOS_VIEW_OPERATOR_VERIFY( ARG ) \
#define KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( ARG ) \
View::template verify_space< Kokkos::Impl::ActiveExecutionMemorySpace >::check(); \
Kokkos::Impl::view_verify_operator_bounds ARG ;
#else
#define KOKKOS_VIEW_OPERATOR_VERIFY( ARG ) \
#define KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( ARG ) \
View::template verify_space< Kokkos::Impl::ActiveExecutionMemorySpace >::check();
#endif
@ -647,9 +647,9 @@ public:
operator()( Args ... args ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,args...) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,args...) )
#endif
return m_map.reference();
@ -670,9 +670,9 @@ public:
, Args ... args ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,args...) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,args...) )
#endif
return m_map.reference(i0);
@ -692,9 +692,9 @@ public:
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,args...) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,args...) )
#endif
return m_map.m_handle[ i0 ];
@ -713,9 +713,9 @@ public:
, Args ... args ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,args...) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,args...) )
#endif
return m_map.m_handle[ m_map.m_offset.m_stride.S0 * i0 ];
@ -734,9 +734,9 @@ public:
operator[]( const I0 & i0 ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0) )
#endif
return m_map.reference(i0);
@ -753,9 +753,9 @@ public:
operator[]( const I0 & i0 ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0) )
#endif
return m_map.m_handle[ i0 ];
@ -772,9 +772,9 @@ public:
operator[]( const I0 & i0 ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0) )
#endif
return m_map.m_handle[ m_map.m_offset.m_stride.S0 * i0 ];
@ -795,9 +795,9 @@ public:
, Args ... args ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,args...) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,args...) )
#endif
return m_map.reference(i0,i1);
@ -816,9 +816,9 @@ public:
, Args ... args ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,args...) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,args...) )
#endif
return m_map.m_handle[ i0 + m_map.m_offset.m_dim.N0 * i1 ];
@ -837,9 +837,9 @@ public:
, Args ... args ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,args...) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,args...) )
#endif
return m_map.m_handle[ i0 + m_map.m_offset.m_stride * i1 ];
@ -858,9 +858,9 @@ public:
, Args ... args ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,args...) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,args...) )
#endif
return m_map.m_handle[ i1 + m_map.m_offset.m_dim.N1 * i0 ];
@ -879,9 +879,9 @@ public:
, Args ... args ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,args...) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,args...) )
#endif
return m_map.m_handle[ i1 + m_map.m_offset.m_stride * i0 ];
@ -900,9 +900,9 @@ public:
, Args ... args ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,args...) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,args...) )
#endif
return m_map.m_handle[ i0 * m_map.m_offset.m_stride.S0 +
@ -924,9 +924,9 @@ public:
, Args ... args ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,args...) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,args...) )
#endif
return m_map.m_handle[ m_map.m_offset(i0,i1,i2) ];
@ -944,9 +944,9 @@ public:
, Args ... args ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,args...) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,args...) )
#endif
return m_map.reference(i0,i1,i2);
@ -967,9 +967,9 @@ public:
, Args ... args ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,args...) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,args...) )
#endif
return m_map.m_handle[ m_map.m_offset(i0,i1,i2,i3) ];
@ -987,9 +987,9 @@ public:
, Args ... args ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,args...) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,args...) )
#endif
return m_map.reference(i0,i1,i2,i3);
@ -1012,9 +1012,9 @@ public:
, Args ... args ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,args...) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,args...) )
#endif
return m_map.m_handle[ m_map.m_offset(i0,i1,i2,i3,i4) ];
@ -1034,9 +1034,9 @@ public:
, Args ... args ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,args...) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,args...) )
#endif
return m_map.reference(i0,i1,i2,i3,i4);
@ -1059,9 +1059,9 @@ public:
, Args ... args ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,i5,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,i5,args...) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,args...) )
#endif
return m_map.m_handle[ m_map.m_offset(i0,i1,i2,i3,i4,i5) ];
@ -1081,9 +1081,9 @@ public:
, Args ... args ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,i5,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,i5,args...) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,args...) )
#endif
return m_map.reference(i0,i1,i2,i3,i4,i5);
@ -1106,9 +1106,9 @@ public:
, Args ... args ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,i5,i6,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,i5,i6,args...) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,i6,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,i6,args...) )
#endif
return m_map.m_handle[ m_map.m_offset(i0,i1,i2,i3,i4,i5,i6) ];
@ -1128,9 +1128,9 @@ public:
, Args ... args ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,i5,i6,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,i5,i6,args...) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,i6,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,i6,args...) )
#endif
return m_map.reference(i0,i1,i2,i3,i4,i5,i6);
@ -1153,9 +1153,9 @@ public:
, Args ... args ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,i5,i6,i7,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,i5,i6,i7,args...) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,i6,i7,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,i6,i7,args...) )
#endif
return m_map.m_handle[ m_map.m_offset(i0,i1,i2,i3,i4,i5,i6,i7) ];
@ -1175,15 +1175,15 @@ public:
, Args ... args ) const
{
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,i5,i6,i7,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,i5,i6,i7,args...) )
#else
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,i6,i7,args...) )
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,i6,i7,args...) )
#endif
return m_map.reference(i0,i1,i2,i3,i4,i5,i6,i7);
}
#undef KOKKOS_VIEW_OPERATOR_VERIFY
#undef KOKKOS_IMPL_VIEW_OPERATOR_VERIFY
//----------------------------------------
// Standard destructor, constructors, and assignment operators
@ -1322,7 +1322,7 @@ public:
alloc_prop prop( arg_prop );
//------------------------------------------------------------
#if defined( KOKKOS_HAVE_CUDA )
#if defined( KOKKOS_ENABLE_CUDA )
// If allocating in CudaUVMSpace must fence before and after
// the allocation to protect against possible concurrent access
// on the CPU and the GPU.
@ -1338,7 +1338,7 @@ public:
record = m_map.allocate_shared( prop , arg_layout );
//------------------------------------------------------------
#if defined( KOKKOS_HAVE_CUDA )
#if defined( KOKKOS_ENABLE_CUDA )
if ( std::is_same< Kokkos::CudaUVMSpace , typename traits::device_type::memory_space >::value ) {
traits::device_type::memory_space::execution_space::fence();
}

View File

@ -79,7 +79,7 @@ private:
, const Member ibeg , const Member iend )
{
#ifdef KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
#pragma ivdep
#endif
#endif
@ -96,7 +96,7 @@ private:
{
const TagType t{} ;
#ifdef KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
#pragma ivdep
#endif
#endif
@ -218,7 +218,7 @@ private:
, reference_type update )
{
#ifdef KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
#pragma ivdep
#endif
#endif
@ -236,7 +236,7 @@ private:
{
const TagType t{} ;
#ifdef KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
#pragma ivdep
#endif
#endif
@ -417,7 +417,7 @@ private:
, reference_type update , const bool final )
{
#ifdef KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
#pragma ivdep
#endif
#endif
@ -435,7 +435,7 @@ private:
{
const TagType t{} ;
#ifdef KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
#pragma ivdep
#endif
#endif

View File

@ -43,7 +43,7 @@
#include <Kokkos_Core.hpp>
#if defined( KOKKOS_HAVE_OPENMP ) && defined( KOKKOS_ENABLE_TASKDAG )
#if defined( KOKKOS_ENABLE_OPENMP ) && defined( KOKKOS_ENABLE_TASKDAG )
#include <impl/Kokkos_TaskQueue_impl.hpp>
@ -324,6 +324,6 @@ void TaskQueueSpecialization< Kokkos::OpenMP >::
//----------------------------------------------------------------------------
#endif /* #if defined( KOKKOS_HAVE_OPENMP ) && defined( KOKKOS_ENABLE_TASKDAG ) */
#endif /* #if defined( KOKKOS_ENABLE_OPENMP ) && defined( KOKKOS_ENABLE_TASKDAG ) */

Some files were not shown because too many files have changed in this diff Show More