Compare commits
79 Commits
patch_26Ja
...
patch_21Fe
| Author | SHA1 | Date | |
|---|---|---|---|
| 49e83b4348 | |||
| 6e89ccd522 | |||
| 53f3df5bfc | |||
| 3dbbea342a | |||
| b70c670aac | |||
| 1d17cae407 | |||
| 429264a12b | |||
| d001a09345 | |||
| cb9d42da08 | |||
| 7185ec92b3 | |||
| 1cd4c48ccc | |||
| a88136c3f5 | |||
| ce20c7ffe9 | |||
| 4a80df3a99 | |||
| 5f93fad012 | |||
| ccaec315db | |||
| c6c1852b3b | |||
| 69a8e19dc5 | |||
| 928947dcea | |||
| 48070011d9 | |||
| 0fb8dacc00 | |||
| 6b923476b9 | |||
| 20806dd86a | |||
| 90e5ae965d | |||
| 15008c9d18 | |||
| 33af7ab248 | |||
| 8f9b2aca06 | |||
| 383da816c2 | |||
| cb982f2f28 | |||
| 4843296d4e | |||
| 2bdda8f6c0 | |||
| 0068ef5616 | |||
| 02b0e6cc55 | |||
| fbb24c2406 | |||
| a5f830c40c | |||
| 8c074a363a | |||
| 27aca14094 | |||
| 191453e1c7 | |||
| 207adc3968 | |||
| 84c517159d | |||
| 6ca377436f | |||
| dc34a32602 | |||
| 067119f6c6 | |||
| 1834a5e46c | |||
| 6a4918b39a | |||
| 5da0d39392 | |||
| 6f92429602 | |||
| 38e0e4bb69 | |||
| daf9f95381 | |||
| 6595fde0a1 | |||
| 6bcec9c61d | |||
| 9d1991bf84 | |||
| 0a87b7443a | |||
| 7ee45ec5f3 | |||
| d4c9e2500b | |||
| 6232073d3b | |||
| ed59193d13 | |||
| 67bed8e853 | |||
| bcb1d94b9a | |||
| fbe30b5683 | |||
| 9ef55fedf7 | |||
| 997142a4c1 | |||
| 033b07fdb7 | |||
| 51a0b6b445 | |||
| 59f4a77dd5 | |||
| 579cc6d7aa | |||
| 5afd3e995b | |||
| 2a6f5e651c | |||
| 09fc8b0bd7 | |||
| e5d0bde783 | |||
| 9daf7fb650 | |||
| b5d622c6a3 | |||
| 2023fa28e0 | |||
| 5b29515849 | |||
| 5b18421dd2 | |||
| cf95ea0709 | |||
| 6a74a81da0 | |||
| f0a4ed615d | |||
| cfe818a175 |
@ -1,7 +1,7 @@
|
||||
<!-- HTML_ONLY -->
|
||||
<HEAD>
|
||||
<TITLE>LAMMPS Users Manual</TITLE>
|
||||
<META NAME="docnumber" CONTENT="26 Jan 2017 version">
|
||||
<META NAME="docnumber" CONTENT="21 Feb 2017 version">
|
||||
<META NAME="author" CONTENT="http://lammps.sandia.gov - Sandia National Laboratories">
|
||||
<META NAME="copyright" CONTENT="Copyright (2003) Sandia Corporation. This software and manual is distributed under the GNU General Public License.">
|
||||
</HEAD>
|
||||
@ -21,7 +21,7 @@
|
||||
<H1></H1>
|
||||
|
||||
LAMMPS Documentation :c,h3
|
||||
26 Jan 2017 version :c,h4
|
||||
21 Feb 2017 version :c,h4
|
||||
|
||||
Version info: :h4
|
||||
|
||||
|
||||
@ -969,7 +969,7 @@ KOKKOS, o = USER-OMP, t = OPT.
|
||||
"lubricateU/poly"_pair_lubricateU.html,
|
||||
"meam"_pair_meam.html,
|
||||
"mie/cut (o)"_pair_mie.html,
|
||||
"morse (got)"_pair_morse.html,
|
||||
"morse (gkot)"_pair_morse.html,
|
||||
"nb3b/harmonic (o)"_pair_nb3b_harmonic.html,
|
||||
"nm/cut (o)"_pair_nm.html,
|
||||
"nm/cut/coul/cut (o)"_pair_nm.html,
|
||||
@ -1076,7 +1076,7 @@ KOKKOS, o = USER-OMP, t = OPT.
|
||||
"none"_bond_none.html,
|
||||
"zero"_bond_zero.html,
|
||||
"hybrid"_bond_hybrid.html,
|
||||
"class2 (o)"_bond_class2.html,
|
||||
"class2 (ko)"_bond_class2.html,
|
||||
"fene (iko)"_bond_fene.html,
|
||||
"fene/expand (o)"_bond_fene_expand.html,
|
||||
"harmonic (ko)"_bond_harmonic.html,
|
||||
@ -1109,7 +1109,7 @@ USER-OMP, t = OPT.
|
||||
"zero"_angle_zero.html,
|
||||
"hybrid"_angle_hybrid.html,
|
||||
"charmm (ko)"_angle_charmm.html,
|
||||
"class2 (o)"_angle_class2.html,
|
||||
"class2 (ko)"_angle_class2.html,
|
||||
"cosine (o)"_angle_cosine.html,
|
||||
"cosine/delta (o)"_angle_cosine_delta.html,
|
||||
"cosine/periodic (o)"_angle_cosine_periodic.html,
|
||||
@ -1145,7 +1145,7 @@ USER-OMP, t = OPT.
|
||||
"zero"_dihedral_zero.html,
|
||||
"hybrid"_dihedral_hybrid.html,
|
||||
"charmm (ko)"_dihedral_charmm.html,
|
||||
"class2 (o)"_dihedral_class2.html,
|
||||
"class2 (ko)"_dihedral_class2.html,
|
||||
"harmonic (io)"_dihedral_harmonic.html,
|
||||
"helix (o)"_dihedral_helix.html,
|
||||
"multi/harmonic (o)"_dihedral_multi_harmonic.html,
|
||||
@ -1177,7 +1177,7 @@ USER-OMP, t = OPT.
|
||||
"none"_improper_none.html,
|
||||
"zero"_improper_zero.html,
|
||||
"hybrid"_improper_hybrid.html,
|
||||
"class2 (o)"_improper_class2.html,
|
||||
"class2 (ko)"_improper_class2.html,
|
||||
"cvff (io)"_improper_cvff.html,
|
||||
"harmonic (ko)"_improper_harmonic.html,
|
||||
"umbrella (o)"_improper_umbrella.html :tb(c=4,ea=c)
|
||||
|
||||
@ -22,7 +22,7 @@ either conceptually, or as printed out by the program.
|
||||
|
||||
12.1 Common problems :link(err_1),h4
|
||||
|
||||
If two LAMMPS runs do not produce the same answer on different
|
||||
If two LAMMPS runs do not produce the exact same answer on different
|
||||
machines or different numbers of processors, this is typically not a
|
||||
bug. In theory you should get identical answers on any number of
|
||||
processors and on any machine. In practice, numerical round-off can
|
||||
@ -80,12 +80,24 @@ order. If you mess this up, LAMMPS will often flag the error, but it
|
||||
may also simply read a bogus argument and assign a value that is
|
||||
valid, but not what you wanted. E.g. trying to read the string "abc"
|
||||
as an integer value of 0. Careful reading of the associated doc page
|
||||
for the command should allow you to fix these problems. Note that
|
||||
some commands allow for variables to be specified in place of numeric
|
||||
constants so that the value can be evaluated and change over the
|
||||
course of a run. This is typically done with the syntax {v_name} for
|
||||
a parameter, where name is the name of the variable. This is only
|
||||
allowed if the command documentation says it is.
|
||||
for the command should allow you to fix these problems. In most cases,
|
||||
where LAMMPS expects to read a number, either integer or floating point,
|
||||
it performs a stringent test on whether the provided input actually
|
||||
is an integer or floating-point number, respectively, and reject the
|
||||
input with an error message (for instance, when an integer is required,
|
||||
but a floating-point number 1.0 is provided):
|
||||
|
||||
ERROR: Expected integer parameter in input script or data file :pre
|
||||
|
||||
Some commands allow for using variable references in place of numeric
|
||||
constants so that the value can be evaluated and may change over the
|
||||
course of a run. This is typically done with the syntax {v_name} for a
|
||||
parameter, where name is the name of the variable. On the other hand,
|
||||
immediate variable expansion with the syntax ${name} is performed while
|
||||
reading the input and before parsing commands,
|
||||
|
||||
NOTE: Using a variable reference (i.e. {v_name}) is only allowed if
|
||||
the documentation of the corresponding command explicitly says it is.
|
||||
|
||||
Generally, LAMMPS will print a message to the screen and logfile and
|
||||
exit gracefully when it encounters a fatal error. Sometimes it will
|
||||
|
||||
@ -2573,7 +2573,7 @@ well.
|
||||
6.26 Adiabatic core/shell model :link(howto_26),h4
|
||||
|
||||
The adiabatic core-shell model by "Mitchell and
|
||||
Finchham"_#MitchellFinchham is a simple method for adding
|
||||
Fincham"_#MitchellFincham is a simple method for adding
|
||||
polarizability to a system. In order to mimic the electron shell of
|
||||
an ion, a satellite particle is attached to it. This way the ions are
|
||||
split into a core and a shell where the latter is meant to react to
|
||||
@ -2667,13 +2667,16 @@ bond_coeff 1 63.014 0.0
|
||||
bond_coeff 2 25.724 0.0 :pre
|
||||
|
||||
When running dynamics with the adiabatic core/shell model, the
|
||||
following issues should be considered. Since the relative motion of
|
||||
the core and shell particles corresponds to the polarization, typical
|
||||
thermostats can alter the polarization behaviour, meaning the shell
|
||||
will not react freely to its electrostatic environment. This is
|
||||
critical during the equilibration of the system. Therefore
|
||||
it's typically desirable to decouple the relative motion of the
|
||||
core/shell pair, which is an imaginary degree of freedom, from the
|
||||
following issues should be considered. The relative motion of
|
||||
the core and shell particles corresponds to the polarization,
|
||||
hereby an instantaneous relaxation of the shells is approximated
|
||||
and a fast core/shell spring frequency ensures a nearly constant
|
||||
internal kinetic energy during the simulation.
|
||||
Thermostats can alter this polarization behaviour, by scaling the
|
||||
internal kinetic energy, meaning the shell will not react freely to
|
||||
its electrostatic environment.
|
||||
Therefore it is typically desirable to decouple the relative motion of
|
||||
the core/shell pair, which is an imaginary degree of freedom, from the
|
||||
real physical system. To do that, the "compute
|
||||
temp/cs"_compute_temp_cs.html command can be used, in conjunction with
|
||||
any of the thermostat fixes, such as "fix nvt"_fix_nh.html or "fix
|
||||
@ -2704,6 +2707,22 @@ fix thermostatequ all nve # integrator as needed f
|
||||
fix_modify thermoberendsen temp CSequ
|
||||
thermo_modify temp CSequ # output of center-of-mass derived temperature :pre
|
||||
|
||||
The pressure for the core/shell system is computed via the regular
|
||||
LAMMPS convention by "treating the cores and shells as individual
|
||||
particles"_#MitchellFincham2. For the thermo output of the pressure
|
||||
as well as for the application of a barostat, it is necessary to
|
||||
use an additional "pressure"_compute_pressure compute based on the
|
||||
default "temperature"_compute_temp and specifying it as a second
|
||||
argument in "fix modify"_fix_modify.html and
|
||||
"thermo_modify"_thermo_modify.html resulting in:
|
||||
|
||||
(...)
|
||||
compute CSequ all temp/cs cores shells
|
||||
compute thermo_press_lmp all pressure thermo_temp # pressure for individual particles
|
||||
thermo_modify temp CSequ press thermo_press_lmp # modify thermo to regular pressure
|
||||
fix press_bar all npt temp 300 300 0.04 iso 0 0 0.4
|
||||
fix_modify press_bar temp CSequ press thermo_press_lmp # pressure modification for correct kinetic scalar :pre
|
||||
|
||||
If "compute temp/cs"_compute_temp_cs.html is used, the decoupled
|
||||
relative motion of the core and the shell should in theory be
|
||||
stable. However numerical fluctuation can introduce a small
|
||||
@ -2724,24 +2743,18 @@ temp/cs"_compute_temp_cs.html command to the {temp} keyword of the
|
||||
velocity all create 1427 134 bias yes temp CSequ
|
||||
velocity all scale 1427 temp CSequ :pre
|
||||
|
||||
It is important to note that the polarizability of the core/shell
|
||||
pairs is based on their relative motion. Therefore the choice of
|
||||
spring force and mass ratio need to ensure much faster relative motion
|
||||
of the 2 atoms within the core/shell pair than their center-of-mass
|
||||
velocity. This allow the shells to effectively react instantaneously
|
||||
to the electrostatic environment. This fast movement also limits the
|
||||
timestep size that can be used.
|
||||
To maintain the correct polarizability of the core/shell pairs, the
|
||||
kinetic energy of the internal motion shall remain nearly constant.
|
||||
Therefore the choice of spring force and mass ratio need to ensure
|
||||
much faster relative motion of the 2 atoms within the core/shell pair
|
||||
than their center-of-mass velocity. This allows the shells to
|
||||
effectively react instantaneously to the electrostatic environment and
|
||||
limits energy transfer to or from the core/shell oscillators.
|
||||
This fast movement also dictates the timestep that can be used.
|
||||
|
||||
The primary literature of the adiabatic core/shell model suggests that
|
||||
the fast relative motion of the core/shell pairs only allows negligible
|
||||
energy transfer to the environment. Therefore it is not intended to
|
||||
decouple the core/shell degree of freedom from the physical system
|
||||
during production runs. In other words, the "compute
|
||||
temp/cs"_compute_temp_cs.html command should not be used during
|
||||
production runs and is only required during equilibration. This way one
|
||||
is consistent with literature (based on the code packages DL_POLY or
|
||||
GULP for instance).
|
||||
|
||||
energy transfer to the environment.
|
||||
The mentioned energy transfer will typically lead to a small drift
|
||||
in total energy over time. This internal energy can be monitored
|
||||
using the "compute chunk/atom"_compute_chunk_atom.html and "compute
|
||||
@ -2761,14 +2774,20 @@ command, to use as input to the "compute
|
||||
chunk/atom"_compute_chunk_atom.html command to define the core/shell
|
||||
pairs as chunks.
|
||||
|
||||
For example,
|
||||
For example if core/shell pairs are the only molecules:
|
||||
|
||||
read_data NaCl_CS_x0.1_prop.data
|
||||
compute prop all property/atom molecule
|
||||
compute cs_chunk all chunk/atom c_prop
|
||||
compute cstherm all temp/chunk cs_chunk temp internal com yes cdof 3.0 # note the chosen degrees of freedom for the core/shell pairs
|
||||
fix ave_chunk all ave/time 10 1 10 c_cstherm file chunk.dump mode vector :pre
|
||||
|
||||
For example if core/shell pairs and other molecules are present:
|
||||
|
||||
fix csinfo all property/atom i_CSID # property/atom command
|
||||
read_data NaCl_CS_x0.1_prop.data fix csinfo NULL CS-Info # atom property added in the data-file
|
||||
compute prop all property/atom i_CSID
|
||||
compute cs_chunk all chunk/atom c_prop
|
||||
compute cstherm all temp/chunk cs_chunk temp internal com yes cdof 3.0 # note the chosen degrees of freedom for the core/shell pairs
|
||||
fix ave_chunk all ave/time 10 1 10 c_cstherm file chunk.dump mode vector :pre
|
||||
(...) :pre
|
||||
|
||||
The additional section in the date file would be formatted like this:
|
||||
|
||||
@ -2890,9 +2909,13 @@ Phys, 79, 926 (1983).
|
||||
:link(Shinoda)
|
||||
[(Shinoda)] Shinoda, Shiga, and Mikami, Phys Rev B, 69, 134103 (2004).
|
||||
|
||||
:link(MitchellFinchham)
|
||||
[(Mitchell and Finchham)] Mitchell, Finchham, J Phys Condensed Matter,
|
||||
:link(MitchellFincham)
|
||||
[(Mitchell and Fincham)] Mitchell, Fincham, J Phys Condensed Matter,
|
||||
5, 1031-1038 (1993).
|
||||
|
||||
:link(MitchellFincham2)
|
||||
[(Fincham)] Fincham, Mackrodt and Mitchell, J Phys Condensed Matter,
|
||||
6, 393-404 (1994).
|
||||
|
||||
:link(howto-Lamoureux)
|
||||
[(Lamoureux and Roux)] G. Lamoureux, B. Roux, J. Chem. Phys 119, 3025 (2003)
|
||||
|
||||
@ -413,7 +413,7 @@ uses (for performing 1d FFTs) when running the particle-particle
|
||||
particle-mesh (PPPM) option for long-range Coulombics via the
|
||||
"kspace_style"_kspace_style.html command.
|
||||
|
||||
LAMMPS supports various open-source or vendor-supplied FFT libraries
|
||||
LAMMPS supports common open-source or vendor-supplied FFT libraries
|
||||
for this purpose. If you leave these 3 variables blank, LAMMPS will
|
||||
use the open-source "KISS FFT library"_http://kissfft.sf.net, which is
|
||||
included in the LAMMPS distribution. This library is portable to all
|
||||
@ -423,10 +423,9 @@ package in your build, you can also leave the 3 variables blank.
|
||||
|
||||
Otherwise, select which kinds of FFTs to use as part of the FFT_INC
|
||||
setting by a switch of the form -DFFT_XXX. Recommended values for XXX
|
||||
are: MKL, SCSL, FFTW2, and FFTW3. Legacy options are: INTEL, SGI,
|
||||
ACML, and T3E. For backward compatability, using -DFFT_FFTW will use
|
||||
the FFTW2 library. Using -DFFT_NONE will use the KISS library
|
||||
described above.
|
||||
are: MKL or FFTW3. FFTW2 and NONE are supported as legacy options.
|
||||
Selecting -DFFT_FFTW will use the FFTW3 library and -DFFT_NONE will
|
||||
use the KISS library described above.
|
||||
|
||||
You may also need to set the FFT_INC, FFT_PATH, and FFT_LIB variables,
|
||||
so the compiler and linker can find the needed FFT header and library
|
||||
|
||||
@ -8,6 +8,7 @@
|
||||
|
||||
angle_style class2 command :h3
|
||||
angle_style class2/omp command :h3
|
||||
angle_style class2/kk command :h3
|
||||
|
||||
[Syntax:]
|
||||
|
||||
|
||||
@ -8,6 +8,7 @@
|
||||
|
||||
bond_style class2 command :h3
|
||||
bond_style class2/omp command :h3
|
||||
bond_style class2/kk command :h3
|
||||
|
||||
[Syntax:]
|
||||
|
||||
|
||||
@ -16,10 +16,11 @@ ID, group-ID are documented in "compute"_compute.html command :ulb,l
|
||||
group/group = style name of this compute command :l
|
||||
group2-ID = group ID of second (or same) group :l
|
||||
zero or more keyword/value pairs may be appended :l
|
||||
keyword = {pair} or {kspace} or {boundary} :l
|
||||
keyword = {pair} or {kspace} or {boundary} or {molecule} :l
|
||||
{pair} value = {yes} or {no}
|
||||
{kspace} value = {yes} or {no}
|
||||
{boundary} value = {yes} or {no} :pre
|
||||
{boundary} value = {yes} or {no}
|
||||
{molecule} value = {off} or {inter} or {intra} :pre
|
||||
:ule
|
||||
|
||||
[Examples:]
|
||||
@ -46,6 +47,13 @@ NOTE: The energies computed by the {pair} keyword do not include tail
|
||||
corrections, even if they are enabled via the
|
||||
"pair_modify"_pair_modify.html command.
|
||||
|
||||
If the {molecule} keyword is set to {inter} or {intra} than an
|
||||
additional check is made based on the molecule IDs of the two atoms in
|
||||
each pair before including their pairwise interaction energy and
|
||||
force. For the {inter} setting, the two atoms must be in different
|
||||
molecules. For the {intra} setting, the two atoms must be in the same
|
||||
molecule.
|
||||
|
||||
If the {kspace} keyword is set to {yes}, which is not the default, and
|
||||
if a "kspace_style"_kspace_style.html is defined, then the interaction
|
||||
energy will include a Kspace component which is the long-range
|
||||
@ -66,6 +74,10 @@ affect the force calculation and will be zero if one or both of the
|
||||
groups are charge neutral. This energy correction term is the same as
|
||||
that included in the regular Ewald and PPPM routines.
|
||||
|
||||
NOTE: The {molecule} setting only affects the group/group
|
||||
contributions calculated by the {pair} keyword. It does not affect
|
||||
the group/group contributions calculated by the {kspace} keyword.
|
||||
|
||||
This compute does not calculate any bond or angle or dihedral or
|
||||
improper interactions between atoms in the two groups.
|
||||
|
||||
@ -78,6 +90,22 @@ work (FFTs, Ewald summation) as computing long-range forces for the
|
||||
entire system. Thus it can be costly to invoke this compute too
|
||||
frequently.
|
||||
|
||||
NOTE: If you have a bonded system, then the settings of
|
||||
"special_bonds"_special_bonds.html command can remove pairwise
|
||||
interactions between atoms in the same bond, angle, or dihedral. This
|
||||
is the default setting for the "special_bonds"_special_bonds.html
|
||||
command, and means those pairwise interactions do not appear in the
|
||||
neighbor list. Because this compute uses a neighbor list, it also
|
||||
means those pairs will not be included in the group/group interaction.
|
||||
This does not apply when using long-range coulomb interactions
|
||||
({coul/long}, {coul/msm}, {coul/wolf} or similar. One way to get
|
||||
around this would be to set special_bond scaling factors to very tiny
|
||||
numbers that are not exactly zero (e.g. 1.0e-50). Another workaround
|
||||
is to write a dump file, and use the "rerun"_rerun.html command to
|
||||
compute the group/group interactions for snapshots in the dump file.
|
||||
The rerun script can use a "special_bonds"_special_bonds.html command
|
||||
that includes all pairs in the neighbor list.
|
||||
|
||||
If you desire a breakdown of the interactions into a pairwise and
|
||||
Kspace component, simply invoke the compute twice with the appropriate
|
||||
yes/no settings for the {pair} and {kspace} keywords. This is no more
|
||||
@ -119,7 +147,8 @@ The {ewald} and {pppm} styles do.
|
||||
|
||||
[Default:]
|
||||
|
||||
The option defaults are pair = yes, kspace = no, and boundary = yes.
|
||||
The option defaults are pair = yes, kspace = no, boundary = yes,
|
||||
molecule = off.
|
||||
|
||||
:line
|
||||
|
||||
|
||||
@ -10,21 +10,27 @@ compute rdf command :h3
|
||||
|
||||
[Syntax:]
|
||||
|
||||
compute ID group-ID rdf Nbin itype1 jtype1 itype2 jtype2 ... :pre
|
||||
compute ID group-ID rdf Nbin itype1 jtype1 itype2 jtype2 ... keyword/value ... :pre
|
||||
|
||||
ID, group-ID are documented in "compute"_compute.html command
|
||||
rdf = style name of this compute command
|
||||
Nbin = number of RDF bins
|
||||
itypeN = central atom type for Nth RDF histogram (see asterisk form below)
|
||||
jtypeN = distribution atom type for Nth RDF histogram (see asterisk form below) :ul
|
||||
ID, group-ID are documented in "compute"_compute.html command :ulb,l
|
||||
rdf = style name of this compute command :l
|
||||
Nbin = number of RDF bins :l
|
||||
itypeN = central atom type for Nth RDF histogram (see asterisk form below) :l
|
||||
jtypeN = distribution atom type for Nth RDF histogram (see asterisk form below) :l
|
||||
|
||||
zero or more keyword/value pairs may be appended :l
|
||||
keyword = {cutoff} :l
|
||||
{cutoff} value = Rcut
|
||||
Rcut = cutoff distance for RDF computation (distance units) :pre
|
||||
:ule
|
||||
|
||||
[Examples:]
|
||||
|
||||
compute 1 all rdf 100
|
||||
compute 1 all rdf 100 1 1
|
||||
compute 1 all rdf 100 * 3
|
||||
compute 1 all rdf 100 * 3 cutoff 5.0
|
||||
compute 1 fluid rdf 500 1 1 1 2 2 1 2 2
|
||||
compute 1 fluid rdf 500 1*3 2 5 *10 :pre
|
||||
compute 1 fluid rdf 500 1*3 2 5 *10 cutoff 3.5 :pre
|
||||
|
||||
[Description:]
|
||||
|
||||
@ -32,7 +38,8 @@ Define a computation that calculates the radial distribution function
|
||||
(RDF), also called g(r), and the coordination number for a group of
|
||||
particles. Both are calculated in histogram form by binning pairwise
|
||||
distances into {Nbin} bins from 0.0 to the maximum force cutoff
|
||||
defined by the "pair_style"_pair_style.html command. The bins are of
|
||||
defined by the "pair_style"_pair_style.html command or the cutoff
|
||||
distance {Rcut} specified via the {cutoff} keyword. The bins are of
|
||||
uniform size in radial distance. Thus a single bin encompasses a thin
|
||||
shell of distances in 3d and a thin ring of distances in 2d.
|
||||
|
||||
@ -41,17 +48,41 @@ NOTE: If you have a bonded system, then the settings of
|
||||
interactions between atoms in the same bond, angle, or dihedral. This
|
||||
is the default setting for the "special_bonds"_special_bonds.html
|
||||
command, and means those pairwise interactions do not appear in the
|
||||
neighbor list. Because this fix uses the neighbor list, it also means
|
||||
neighbor list. Because this fix uses a neighbor list, it also means
|
||||
those pairs will not be included in the RDF. This does not apply when
|
||||
using long-range coulomb ({coul/long}, {coul/msm}, {coul/wolf} or
|
||||
similar. One way to get around this would be to set special_bond
|
||||
scaling factors to very tiny numbers that are not exactly zero
|
||||
(e.g. 1.0e-50). Another workaround is to write a dump file, and use
|
||||
the "rerun"_rerun.html command to compute the RDF for snapshots in the
|
||||
dump file. The rerun script can use a
|
||||
using long-range coulomb interactions ({coul/long}, {coul/msm},
|
||||
{coul/wolf} or similar. One way to get around this would be to set
|
||||
special_bond scaling factors to very tiny numbers that are not exactly
|
||||
zero (e.g. 1.0e-50). Another workaround is to write a dump file, and
|
||||
use the "rerun"_rerun.html command to compute the RDF for snapshots in
|
||||
the dump file. The rerun script can use a
|
||||
"special_bonds"_special_bonds.html command that includes all pairs in
|
||||
the neighbor list.
|
||||
|
||||
By default the RDF is computed out to the maximum force cutoff defined
|
||||
by the "pair_style"_pair_style.html command. If the {cutoff} keyword
|
||||
is used, then the RDF is computed accurately out to the {Rcut} > 0.0
|
||||
distance specified.
|
||||
|
||||
NOTE: Normally, you should only use the {cutoff} keyword if no pair
|
||||
style is defined, e.g. the "rerun"_rerun.html command is being used to
|
||||
post-process a dump file of snapshots. Or if you really want the RDF
|
||||
for distances beyond the pair_style force cutoff and cannot easily
|
||||
post-process a dump file to calculate it. This is because using the
|
||||
{cutoff} keyword incurs extra computation and possibly communication,
|
||||
which may slow down your simulation. If you specify a {Rcut} <= force
|
||||
cutoff, you will force an additional neighbor list to be built at
|
||||
every timestep this command is invoked (or every reneighboring
|
||||
timestep, whichever is less frequent), which is inefficent. LAMMPS
|
||||
will warn you if this is the case. If you specify a {Rcut} > force
|
||||
cutoff, you must insure ghost atom information out to {Rcut} + {skin}
|
||||
is communicated, via the "comm_modify cutoff"_comm_modify.html
|
||||
command, else the RDF computation cannot be performed, and LAMMPS will
|
||||
give an error message. The {skin} value is what is specified with the
|
||||
"neighbor"_neighbor.html command. In this case, you are forcing a
|
||||
large neighbor list to be built just for the RDF computation, and
|
||||
extra communication to be performed every timestep.
|
||||
|
||||
The {itypeN} and {jtypeN} arguments are optional. These arguments
|
||||
must come in pairs. If no pairs are listed, then a single histogram
|
||||
is computed for g(r) between all atom types. If one or more pairs are
|
||||
@ -153,4 +184,6 @@ change from zero to one at the location of the spike in g(r).
|
||||
|
||||
"fix ave/time"_fix_ave_time.html
|
||||
|
||||
[Default:] none
|
||||
[Default:]
|
||||
|
||||
The keyword defaults are cutoff = 0.0 (use the pairwise force cutoff).
|
||||
|
||||
@ -8,6 +8,7 @@
|
||||
|
||||
dihedral_style class2 command :h3
|
||||
dihedral_style class2/omp command :h3
|
||||
dihedral_style class2/kk command :h3
|
||||
|
||||
[Syntax:]
|
||||
|
||||
|
||||
@ -8,6 +8,7 @@
|
||||
|
||||
improper_style class2 command :h3
|
||||
improper_style class2/omp command :h3
|
||||
improper_style class2/kk command :h3
|
||||
|
||||
[Syntax:]
|
||||
|
||||
|
||||
@ -13,6 +13,7 @@ pair_style morse/opt command :h3
|
||||
pair_style morse/smooth/linear command :h3
|
||||
pair_style morse/smooth/linear/omp command :h3
|
||||
pair_style morse/soft command :h3
|
||||
pair_style morse/kk command :h3
|
||||
|
||||
[Syntax:]
|
||||
|
||||
|
||||
@ -54,7 +54,8 @@ reset_timestep 0
|
||||
variable pxy equal pxy
|
||||
variable pxx equal pxx-press
|
||||
|
||||
fix avstress all ave/time $s $p $d v_pxy v_pxx ave one file einstein.dat
|
||||
fix avstress all ave/time $s $p $d v_pxy v_pxx ave one &
|
||||
file profile.einstein.2d
|
||||
|
||||
# Diagonal components of SS are larger by factor 2-2/d,
|
||||
# which is 4/3 for d=3, but 1 for d=2.
|
||||
|
||||
@ -40,7 +40,8 @@ thermo 50
|
||||
thermo_style custom step etotal pe ke temp press &
|
||||
epair evdwl ecoul elong ebond fnorm fmax vol
|
||||
|
||||
compute CSequ all temp/cs cores shells
|
||||
compute CStemp all temp/cs cores shells
|
||||
compute thermo_press_lmp all pressure thermo_temp # press for correct kinetic scalar
|
||||
|
||||
# output via chunk method
|
||||
|
||||
@ -49,16 +50,18 @@ compute CSequ all temp/cs cores shells
|
||||
#compute cstherm all temp/chunk cs_chunk temp internal com yes cdof 3.0
|
||||
#fix ave_chunk all ave/time 100 1 100 c_cstherm file chunk.dump mode vector
|
||||
|
||||
thermo_modify temp CSequ
|
||||
thermo_modify temp CStemp press thermo_press_lmp
|
||||
|
||||
# velocity bias option
|
||||
|
||||
velocity all create 1427 134 dist gaussian mom yes rot no bias yes temp CSequ
|
||||
velocity all scale 1427 temp CSequ
|
||||
velocity all create 1427 134 dist gaussian mom yes rot no bias yes temp CStemp
|
||||
velocity all scale 1427 temp CStemp
|
||||
|
||||
# thermostating using the core/shell decoupling
|
||||
|
||||
fix thermoberendsen all temp/berendsen 1427 1427 0.4
|
||||
fix nve all nve
|
||||
fix_modify thermoberendsen temp CSequ
|
||||
fix_modify thermoberendsen temp CStemp
|
||||
|
||||
# 2 fmsec timestep
|
||||
|
||||
|
||||
86
examples/coreshell/in.coreshell.thermostats
Normal file
86
examples/coreshell/in.coreshell.thermostats
Normal file
@ -0,0 +1,86 @@
|
||||
# Testsystem for core-shell model compared to Mitchell and Fincham
|
||||
# Hendrik Heenen, June 2014
|
||||
|
||||
# ------------------------ INITIALIZATION ----------------------------
|
||||
|
||||
units metal
|
||||
dimension 3
|
||||
boundary p p p
|
||||
atom_style full
|
||||
|
||||
# ----------------------- ATOM DEFINITION ----------------------------
|
||||
|
||||
fix csinfo all property/atom i_CSID
|
||||
read_data data.coreshell fix csinfo NULL CS-Info
|
||||
|
||||
group cores type 1 2
|
||||
group shells type 3 4
|
||||
|
||||
neighbor 2.0 bin
|
||||
comm_modify vel yes
|
||||
|
||||
# ------------------------ FORCE FIELDS ------------------------------
|
||||
|
||||
kspace_style ewald 1.0e-6
|
||||
pair_style born/coul/long/cs 20.0 20.0 # A, rho, sigma=0, C, D
|
||||
pair_coeff * * 0.0 1.000 0.00 0.00 0.00
|
||||
pair_coeff 3 3 487.0 0.23768 0.00 1.05 0.50 #Na-Na
|
||||
pair_coeff 3 4 145134.0 0.23768 0.00 6.99 8.70 #Na-Cl
|
||||
pair_coeff 4 4 405774.0 0.23768 0.00 72.40 145.40 #Cl-Cl
|
||||
|
||||
bond_style harmonic
|
||||
bond_coeff 1 63.014 0.0
|
||||
bond_coeff 2 25.724 0.0
|
||||
|
||||
# ------------------------ Equilibration Run -------------------------------
|
||||
|
||||
reset_timestep 0
|
||||
|
||||
thermo 50
|
||||
thermo_style custom step etotal pe ke temp press &
|
||||
epair evdwl ecoul elong ebond fnorm fmax vol
|
||||
|
||||
compute CStemp all temp/cs cores shells
|
||||
compute thermo_press_lmp all pressure thermo_temp # press for correct kinetic scalar
|
||||
|
||||
# output via chunk method
|
||||
|
||||
#compute prop all property/atom i_CSID
|
||||
#compute cs_chunk all chunk/atom c_prop
|
||||
#compute cstherm all temp/chunk cs_chunk temp internal com yes cdof 3.0
|
||||
#fix ave_chunk all ave/time 100 1 100 c_cstherm file chunk.dump mode vector
|
||||
|
||||
thermo_modify temp CStemp press thermo_press_lmp
|
||||
|
||||
# 2 fmsec timestep
|
||||
|
||||
timestep 0.002
|
||||
|
||||
# velocity bias option
|
||||
|
||||
velocity all create 1427 134 dist gaussian mom yes rot no bias yes temp CStemp
|
||||
velocity all scale 1427 temp CStemp
|
||||
|
||||
# thermostating using the core/shell decoupling
|
||||
|
||||
fix thermoberendsen all temp/berendsen 1427 1427 0.4
|
||||
fix nve all nve
|
||||
fix_modify thermoberendsen temp CStemp
|
||||
|
||||
run 500
|
||||
|
||||
unfix thermoberendsen
|
||||
unfix nve
|
||||
|
||||
fix npt_equ all npt temp 1427 1427 0.04 iso 0 0 0.4
|
||||
fix_modify npt_equ temp CStemp press thermo_press_lmp # pressure for correct kinetic scalar
|
||||
|
||||
run 500
|
||||
|
||||
unfix npt_equ
|
||||
|
||||
# ------------------------ Dynamic Run -------------------------------
|
||||
|
||||
fix npt_dyn all npt temp 1427 1427 0.04 iso 0 0 0.4
|
||||
fix_modify npt_dyn temp CStemp press thermo_press_lmp # pressure for correct kinetic scalar
|
||||
run 1000
|
||||
189
examples/coreshell/log.9Nov16.coreshell.dsf.g++.4
Normal file
189
examples/coreshell/log.9Nov16.coreshell.dsf.g++.4
Normal file
@ -0,0 +1,189 @@
|
||||
LAMMPS (26 Jan 2017)
|
||||
# Testsystem for core-shell model compared to Mitchel and Finchham
|
||||
# Hendrik Heenen, June 2014
|
||||
|
||||
# ------------------------ INITIALIZATION ----------------------------
|
||||
|
||||
units metal
|
||||
dimension 3
|
||||
boundary p p p
|
||||
atom_style full
|
||||
|
||||
# ----------------------- ATOM DEFINITION ----------------------------
|
||||
|
||||
fix csinfo all property/atom i_CSID
|
||||
read_data data.coreshell fix csinfo NULL CS-Info
|
||||
orthogonal box = (0 0 0) to (24.096 24.096 24.096)
|
||||
1 by 2 by 2 MPI processor grid
|
||||
reading atoms ...
|
||||
432 atoms
|
||||
scanning bonds ...
|
||||
1 = max bonds/atom
|
||||
reading bonds ...
|
||||
216 bonds
|
||||
1 = max # of 1-2 neighbors
|
||||
0 = max # of 1-3 neighbors
|
||||
0 = max # of 1-4 neighbors
|
||||
1 = max # of special neighbors
|
||||
|
||||
group cores type 1 2
|
||||
216 atoms in group cores
|
||||
group shells type 3 4
|
||||
216 atoms in group shells
|
||||
|
||||
neighbor 2.0 bin
|
||||
comm_modify vel yes
|
||||
|
||||
# ------------------------ FORCE FIELDS ------------------------------
|
||||
|
||||
pair_style born/coul/dsf/cs 0.1 20.0 20.0 # A, rho, sigma=0, C, D
|
||||
pair_coeff * * 0.0 1.000 0.00 0.00 0.00
|
||||
pair_coeff 3 3 487.0 0.23768 0.00 1.05 0.50 #Na-Na
|
||||
pair_coeff 3 4 145134.0 0.23768 0.00 6.99 8.70 #Na-Cl
|
||||
pair_coeff 4 4 405774.0 0.23768 0.00 72.40 145.40 #Cl-Cl
|
||||
|
||||
bond_style harmonic
|
||||
bond_coeff 1 63.014 0.0
|
||||
bond_coeff 2 25.724 0.0
|
||||
|
||||
# ------------------------ Equilibration Run -------------------------------
|
||||
|
||||
reset_timestep 0
|
||||
|
||||
thermo 50
|
||||
thermo_style custom step etotal pe ke temp press epair evdwl ecoul elong ebond fnorm fmax vol
|
||||
|
||||
compute CSequ all temp/cs cores shells
|
||||
|
||||
# output via chunk method
|
||||
|
||||
#compute prop all property/atom i_CSID
|
||||
#compute cs_chunk all chunk/atom c_prop
|
||||
#compute cstherm all temp/chunk cs_chunk temp internal com yes cdof 3.0
|
||||
#fix ave_chunk all ave/time 100 1 100 c_cstherm file chunk.dump mode vector
|
||||
|
||||
thermo_modify temp CSequ
|
||||
|
||||
# velocity bias option
|
||||
|
||||
velocity all create 1427 134 dist gaussian mom yes rot no bias yes temp CSequ
|
||||
Neighbor list info ...
|
||||
update every 1 steps, delay 10 steps, check yes
|
||||
max neighbors/atom: 2000, page size: 100000
|
||||
master list distance cutoff = 22
|
||||
ghost atom cutoff = 22
|
||||
binsize = 11, bins = 3 3 3
|
||||
1 neighbor lists, perpetual/occasional/extra = 1 0 0
|
||||
(1) pair born/coul/dsf/cs, half, perpetual
|
||||
pair build: half/bin/newton
|
||||
stencil: half/bin/3d/newton
|
||||
bin: standard
|
||||
velocity all scale 1427 temp CSequ
|
||||
|
||||
fix thermoberendsen all temp/berendsen 1427 1427 0.4
|
||||
fix nve all nve
|
||||
fix_modify thermoberendsen temp CSequ
|
||||
|
||||
# 2 fmsec timestep
|
||||
|
||||
timestep 0.002
|
||||
run 500
|
||||
Memory usage per processor = 6.8559 Mbytes
|
||||
Step TotEng PotEng KinEng Temp Press E_pair E_vdwl E_coul E_long E_bond Fnorm Fmax Volume
|
||||
0 -635.80596 -675.46362 39.657659 1427 -21302.622 -675.46362 1.6320365 -677.09565 0 0 1.5814015e-14 3.2317898e-15 13990.5
|
||||
50 -634.07021 -666.11867 32.048452 1153.1982 -4560.945 -668.28236 37.756542 -706.0389 0 2.163691 13.802484 3.022372 13990.5
|
||||
100 -631.97128 -662.02544 30.054164 1081.4378 -3497.564 -664.61825 39.275003 -703.89325 0 2.5928078 13.956833 2.5417699 13990.5
|
||||
150 -630.14953 -663.04215 32.892622 1183.5739 -88.43828 -665.63444 46.239965 -711.87441 0 2.5922927 14.667898 2.4964255 13990.5
|
||||
200 -628.52878 -663.9795 35.45072 1275.6219 -1755.9004 -666.73564 41.758052 -708.49369 0 2.7561421 14.230743 3.0924004 13990.5
|
||||
250 -627.27102 -662.025 34.753978 1250.5511 -1234.0918 -665.13519 43.170874 -708.30606 0 3.1101887 14.221086 1.941354 13990.5
|
||||
300 -626.5495 -663.74287 37.193368 1338.3275 -2049.3444 -666.45574 40.476148 -706.93188 0 2.7128711 13.330425 1.7756755 13990.5
|
||||
350 -625.87313 -665.21855 39.345421 1415.7647 -1543.1723 -667.90872 41.577366 -709.48609 0 2.6901682 13.541311 1.854662 13990.5
|
||||
400 -625.09344 -661.26404 36.1706 1301.5253 -729.96729 -664.10334 43.468765 -707.57211 0 2.8392963 13.663555 1.9067551 13990.5
|
||||
450 -624.46214 -660.01362 35.551477 1279.2474 -1617.7158 -663.06571 41.644856 -704.71057 0 3.0520921 14.527005 1.7280213 13990.5
|
||||
500 -623.49246 -659.2527 35.76024 1286.7593 -935.99238 -662.32953 43.038808 -705.36834 0 3.0768302 14.099593 1.9831106 13990.5
|
||||
Loop time of 4.09864 on 4 procs for 500 steps with 432 atoms
|
||||
|
||||
Performance: 21.080 ns/day, 1.139 hours/ns, 121.992 timesteps/s
|
||||
99.7% CPU use with 4 MPI tasks x no OpenMP threads
|
||||
|
||||
MPI task timing breakdown:
|
||||
Section | min time | avg time | max time |%varavg| %total
|
||||
---------------------------------------------------------------
|
||||
Pair | 3.3804 | 3.568 | 3.8354 | 8.9 | 87.05
|
||||
Bond | 0.00074339 | 0.00079519 | 0.00087976 | 0.0 | 0.02
|
||||
Neigh | 0.045851 | 0.046084 | 0.046361 | 0.1 | 1.12
|
||||
Comm | 0.20413 | 0.47123 | 0.65875 | 24.3 | 11.50
|
||||
Output | 0.00044298 | 0.00046057 | 0.00051165 | 0.0 | 0.01
|
||||
Modify | 0.0064909 | 0.0067219 | 0.0069766 | 0.2 | 0.16
|
||||
Other | | 0.005345 | | | 0.13
|
||||
|
||||
Nlocal: 108 ave 114 max 105 min
|
||||
Histogram: 1 1 1 0 0 0 0 0 0 1
|
||||
Nghost: 6527 ave 6599 max 6472 min
|
||||
Histogram: 1 0 1 0 1 0 0 0 0 1
|
||||
Neighs: 74388.2 ave 75855 max 73680 min
|
||||
Histogram: 1 2 0 0 0 0 0 0 0 1
|
||||
|
||||
Total # of neighbors = 297553
|
||||
Ave neighs/atom = 688.78
|
||||
Ave special neighs/atom = 1
|
||||
Neighbor list builds = 20
|
||||
Dangerous builds = 0
|
||||
|
||||
unfix thermoberendsen
|
||||
|
||||
# ------------------------ Dynamic Run -------------------------------
|
||||
|
||||
run 1000
|
||||
Memory usage per processor = 6.85787 Mbytes
|
||||
Step TotEng PotEng KinEng Temp Press E_pair E_vdwl E_coul E_long E_bond Fnorm Fmax Volume
|
||||
500 -623.49319 -659.2527 35.759511 1286.7331 -936.04802 -662.32953 43.038808 -705.36834 0 3.0768302 14.099593 1.9831106 13990.5
|
||||
550 -623.44059 -663.57938 40.138795 1444.3127 -935.73484 -666.2789 42.563337 -708.84224 0 2.6995167 13.918509 2.3189805 13990.5
|
||||
600 -623.4703 -660.01592 36.545618 1315.0196 1327.3492 -663.08845 47.985462 -711.07391 0 3.0725254 15.192713 2.4098428 13990.5
|
||||
650 -623.46796 -661.56776 38.099807 1370.9439 457.82439 -664.81976 45.495622 -710.31538 0 3.2519966 15.026057 1.8500226 13990.5
|
||||
700 -623.50158 -659.5131 36.011523 1295.8012 -460.03772 -663.1078 43.938203 -707.046 0 3.5946908 14.660979 2.4825518 13990.5
|
||||
750 -623.44787 -661.93353 38.485658 1384.8279 97.429626 -664.9551 45.083146 -710.03825 0 3.0215753 15.10043 2.3433897 13990.5
|
||||
800 -623.48215 -659.50655 36.024402 1296.2647 1097.3866 -662.61124 47.251998 -709.86324 0 3.1046914 14.556382 2.0543766 13990.5
|
||||
850 -623.45868 -661.13782 37.679134 1355.8068 -1802.1624 -664.41257 40.70845 -705.12102 0 3.2747525 14.691444 2.2054332 13990.5
|
||||
900 -623.43556 -663.59137 40.155815 1444.9251 534.99197 -666.71877 45.601619 -712.32039 0 3.127395 14.741411 2.5807895 13990.5
|
||||
950 -623.51318 -661.57916 38.06598 1369.7267 -678.12625 -664.37535 43.207862 -707.58322 0 2.7961988 14.430307 2.3936105 13990.5
|
||||
1000 -623.47287 -661.22274 37.749874 1358.3523 634.7979 -664.42973 46.373361 -710.80309 0 3.2069879 15.891192 2.4042765 13990.5
|
||||
1050 -623.48133 -661.52868 38.047347 1369.0562 -583.15228 -664.6098 43.618772 -708.22857 0 3.081116 14.806856 2.3447613 13990.5
|
||||
1100 -623.47867 -661.83761 38.358946 1380.2685 -868.9779 -664.8826 42.84846 -707.73106 0 3.044983 14.69567 2.399143 13990.5
|
||||
1150 -623.44713 -661.21299 37.765857 1358.9274 405.14554 -664.09567 45.578739 -709.6744 0 2.8826753 15.437367 3.1381305 13990.5
|
||||
1200 -623.46549 -660.91706 37.451568 1347.6183 699.78996 -664.0883 46.36297 -710.45127 0 3.1712473 15.109665 1.8891886 13990.5
|
||||
1250 -623.49296 -658.2218 34.728838 1249.6464 1061.0154 -661.29052 47.668699 -708.95922 0 3.0687228 14.901367 2.3964137 13990.5
|
||||
1300 -623.49837 -660.91022 37.411844 1346.1889 226.99512 -664.35989 45.352287 -709.71217 0 3.4496704 15.161542 2.2137993 13990.5
|
||||
1350 -623.46718 -658.80365 35.336469 1271.5108 1039.6469 -662.16908 47.565671 -709.73475 0 3.3654314 15.892516 2.7888426 13990.5
|
||||
1400 -623.47124 -661.45375 37.982513 1366.7233 -379.56023 -664.6321 43.788306 -708.42041 0 3.1783497 14.251126 1.7415409 13990.5
|
||||
1450 -623.46671 -660.17518 36.708464 1320.8792 -374.37056 -662.92706 44.083648 -707.01071 0 2.7518803 15.210167 1.9984277 13990.5
|
||||
1500 -623.50515 -659.06488 35.559725 1279.5442 260.37822 -662.39548 45.779764 -708.17524 0 3.3306005 14.682396 2.4201107 13990.5
|
||||
Loop time of 8.26746 on 4 procs for 1000 steps with 432 atoms
|
||||
|
||||
Performance: 20.901 ns/day, 1.148 hours/ns, 120.956 timesteps/s
|
||||
99.7% CPU use with 4 MPI tasks x no OpenMP threads
|
||||
|
||||
MPI task timing breakdown:
|
||||
Section | min time | avg time | max time |%varavg| %total
|
||||
---------------------------------------------------------------
|
||||
Pair | 6.706 | 7.1568 | 7.6597 | 12.7 | 86.57
|
||||
Bond | 0.0014617 | 0.0015531 | 0.0016506 | 0.2 | 0.02
|
||||
Neigh | 0.10511 | 0.10522 | 0.10532 | 0.0 | 1.27
|
||||
Comm | 0.48547 | 0.98841 | 1.4393 | 34.0 | 11.96
|
||||
Output | 0.0012085 | 0.0012462 | 0.0013196 | 0.1 | 0.02
|
||||
Modify | 0.0021446 | 0.0021989 | 0.0022545 | 0.1 | 0.03
|
||||
Other | | 0.01204 | | | 0.15
|
||||
|
||||
Nlocal: 108 ave 114 max 94 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 1 2
|
||||
Nghost: 6512.25 ave 6586 max 6456 min
|
||||
Histogram: 1 0 0 2 0 0 0 0 0 1
|
||||
Neighs: 74248.2 ave 77441 max 65858 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 3
|
||||
|
||||
Total # of neighbors = 296993
|
||||
Ave neighs/atom = 687.484
|
||||
Ave special neighs/atom = 1
|
||||
Neighbor list builds = 46
|
||||
Dangerous builds = 0
|
||||
Total wall time: 0:00:12
|
||||
5008
examples/snap/He_He_JW2013.table
Normal file
5008
examples/snap/He_He_JW2013.table
Normal file
File diff suppressed because it is too large
Load Diff
17
examples/snap/W.SNAP_HePair.pot
Normal file
17
examples/snap/W.SNAP_HePair.pot
Normal file
@ -0,0 +1,17 @@
|
||||
# DATE: 2017-02-20 CONTRIBUTOR: Mitchell Wood mitwood@sandia.gov CITATION: Wood, M. A. and Thompson, A. P. to appear in arxiv Feb2017, W-He and He-He from Juslin, N. and Wirth, B. D. Journal of Nuclear Materials, 423, (2013) p61-63
|
||||
#
|
||||
# Definition of SNAP+ZBL+Tabulated potential.
|
||||
variable zblcutinner equal 4
|
||||
variable zblcutouter equal 4.8
|
||||
variable zblz equal 74
|
||||
|
||||
# Specify hybrid with SNAP, ZBL, and long-range Coulomb
|
||||
|
||||
pair_style hybrid/overlay zbl ${zblcutinner} ${zblcutouter} snap table spline 10000 table spline 10000
|
||||
pair_coeff 1 1 zbl ${zblz} ${zblz}
|
||||
pair_coeff * * snap W_2940_2017_2.snapcoeff W W_2940_2017_2.snapparam W NULL
|
||||
pair_coeff 2 2 table 1 He_He_JW2013.table HeHe
|
||||
pair_coeff 1 2 table 2 W_He_JW2013.table WHe
|
||||
#Hybrid/overlay will take all pair styles and add their contributions equally, order of pair_coeff doesnt matter here
|
||||
#This is not the case for pair_style hybrid ... where only one pair_coeff is read for each type combination, order matters here.
|
||||
|
||||
16
examples/snap/W_2940_2017_2.pot.snap
Normal file
16
examples/snap/W_2940_2017_2.pot.snap
Normal file
@ -0,0 +1,16 @@
|
||||
# DATE: 2017-02-20 CONTRIBUTOR: Mitchell Wood mitwood@sandia.gov CITATION: Wood, M. A. and Thompson, A. P. to appear in arxiv Feb2017
|
||||
#
|
||||
# Definition of SNAP+ZBL potential.
|
||||
variable zblcutinner equal 4
|
||||
variable zblcutouter equal 4.8
|
||||
variable zblz equal 74
|
||||
|
||||
# Specify hybrid with SNAP, ZBL, and long-range Coulomb
|
||||
|
||||
pair_style hybrid/overlay &
|
||||
zbl ${zblcutinner} ${zblcutouter} &
|
||||
snap
|
||||
pair_coeff 1 1 zbl ${zblz} ${zblz}
|
||||
pair_coeff * * snap W_2940_2017_2.snapcoeff W W_2940_2017_2.snapparam W
|
||||
|
||||
#Nomenclature on the snap files are Element_DakotaID_Year_Month
|
||||
62
examples/snap/W_2940_2017_2.snapcoeff
Normal file
62
examples/snap/W_2940_2017_2.snapcoeff
Normal file
@ -0,0 +1,62 @@
|
||||
# DATE: 2017-02-20 CONTRIBUTOR: Mitchell Wood mitwood@sandia.gov CITATION: Wood, M. A. and Thompson, A. P. to appear in arxiv Feb2017
|
||||
#
|
||||
# LAMMPS SNAP coefficients for W
|
||||
|
||||
1 56
|
||||
W 0.5 1
|
||||
0.781170857801
|
||||
-0.001794941735
|
||||
-0.016628679036
|
||||
-0.066625537037
|
||||
-0.073716343967
|
||||
-0.062913923923
|
||||
0.032552694672
|
||||
-0.134901744419
|
||||
-0.075076334103
|
||||
-0.148558616547
|
||||
-0.140808831101
|
||||
-0.166749145704
|
||||
-0.047487675984
|
||||
-0.049892090603
|
||||
-0.032483739965
|
||||
-0.114766534860
|
||||
-0.106759718242
|
||||
-0.125894850485
|
||||
-0.103409735225
|
||||
-0.095247335447
|
||||
-0.061998736346
|
||||
-0.053895610976
|
||||
-0.010799734206
|
||||
-0.011644828900
|
||||
-0.028316826924
|
||||
0.011176085541
|
||||
0.064619474684
|
||||
-0.023886279996
|
||||
-0.004099224312
|
||||
-0.056084222496
|
||||
-0.035551497650
|
||||
-0.056678501024
|
||||
-0.004905851656
|
||||
-0.015701146162
|
||||
-0.008462280779
|
||||
0.016429018676
|
||||
0.032432633993
|
||||
-0.010805361272
|
||||
-0.014841893457
|
||||
0.019414134562
|
||||
-0.008112452759
|
||||
-0.002700775447
|
||||
0.007032887063
|
||||
-0.009706065042
|
||||
0.008385967833
|
||||
0.028606085876
|
||||
-0.007003591067
|
||||
0.006467260152
|
||||
-0.006666986361
|
||||
0.029243285316
|
||||
0.002477673872
|
||||
-0.000199497504
|
||||
0.004068954075
|
||||
0.006036129972
|
||||
-0.013010633924
|
||||
-0.008314173699
|
||||
12
examples/snap/W_2940_2017_2.snapparam
Normal file
12
examples/snap/W_2940_2017_2.snapparam
Normal file
@ -0,0 +1,12 @@
|
||||
# DATE: 2017-02-20 CONTRIBUTOR: Mitchell Wood mitwood@sandia.gov CITATION: Wood, M. A. and Thompson, A. P. to appear in arxiv Feb2017
|
||||
#
|
||||
# required
|
||||
rcutfac 4.73442
|
||||
twojmax 8
|
||||
|
||||
# optional
|
||||
|
||||
gamma 1
|
||||
rfac0 0.99363
|
||||
rmin0 0
|
||||
diagonalstyle 3
|
||||
333
examples/snap/W_He_JW2013.table
Normal file
333
examples/snap/W_He_JW2013.table
Normal file
@ -0,0 +1,333 @@
|
||||
# DATE: 2017-02-20 CONTRIBUTOR: Mitchell Wood mitwood@sandia.gov CITATION: Juslin, N. and Wirth, B. D. Journal of Nuclear Materials, 423, (2013) p61-63
|
||||
#
|
||||
#-> LAMMPS Potential File in tabular format. <-#
|
||||
# N. Juslin and B.D. Wirth, J. Nucl. Mater. 432: 61-66 (2013). #
|
||||
# Implemented by K.D. Hammond (2013) karlh@utk.edu
|
||||
WHe
|
||||
N 325
|
||||
|
||||
1 0.000001 193967.941432641 21165142.7035409
|
||||
2 0.02 88781.7058271842 5199760.86773195
|
||||
3 0.03 54461.7226844158 2255783.27652381
|
||||
4 0.04 37754.4525147374 1233285.07338825
|
||||
5 0.05 28027.0551422937 765154.791685769
|
||||
6 0.06 21746.7711436002 514318.047209743
|
||||
7 0.07 17407.2122555268 365457.111099578
|
||||
8 0.08 14260.380890756 270524.172812057
|
||||
9 0.09 11894.315375388 206649.630589727
|
||||
10 0.1 10064.3593879466 161847.200920021
|
||||
11 0.11 8616.61401638085 129358.142417815
|
||||
12 0.12 7449.7679909506 105145.917230655
|
||||
13 0.13 6494.61875555217 86685.1834957916
|
||||
14 0.14 5702.42313976808 72334.2424211515
|
||||
15 0.15 5037.94580316996 60990.8171175998
|
||||
16 0.16 4475.14199778904 51894.0939422829
|
||||
17 0.17 3994.38401078788 44506.229619365
|
||||
18 0.18 3580.62666819885 38438.9489608325
|
||||
19 0.19 3222.16279673461 33406.7013437966
|
||||
20 0.2 2909.75986785247 29195.976537891
|
||||
21 0.21 2636.04908399207 25644.7364933058
|
||||
22 0.22 2395.08534986163 22628.3414553063
|
||||
23 0.23 2182.02521022247 20049.740107058
|
||||
24 0.24 1992.88767473625 17832.5164484929
|
||||
25 0.25 1824.374223221 15915.8858113167
|
||||
26 0.26 1673.73168801722 14251.0430039002
|
||||
27 0.27 1538.64662154286 12798.4627755747
|
||||
28 0.28 1417.16307190682 11525.8804272463
|
||||
29 0.29 1307.61796220838 10406.7644777657
|
||||
30 0.3 1208.58985019291 9419.14958758174
|
||||
31 0.31 1118.8579596074 8544.73619272459
|
||||
32 0.32 1037.36917029033 7768.18965475981
|
||||
33 0.33 963.211228625566 7076.59012069084
|
||||
34 0.34 895.590859433663 6458.9972713357
|
||||
35 0.35 833.815769676712 5906.10340798657
|
||||
36 0.36 777.27976462076 5409.9550164674
|
||||
37 0.37 725.450370074705 4963.72782166695
|
||||
38 0.38 677.858485369477 4561.54392990095
|
||||
39 0.39 634.089691821654 4198.3223154716
|
||||
40 0.4 593.776918443069 3869.65589682796
|
||||
41 0.41 556.594226358163 3571.70994736531
|
||||
42 0.42 522.251519987548 3301.13772506169
|
||||
43 0.43 490.490029664824 3055.01007668961
|
||||
44 0.44 461.07843929567 2830.75644369566
|
||||
45 0.45 433.809555685745 2626.11521737135
|
||||
46 0.46 408.497434574275 2439.09179705296
|
||||
47 0.47 384.974893215623 2267.92302383156
|
||||
48 0.48 363.091351319355 2111.04691385653
|
||||
49 0.49 342.710951882019 1967.07681501168
|
||||
50 0.5 323.710921379843 1834.77927007332
|
||||
51 0.51 305.980135298364 1713.05499722693
|
||||
52 0.52 289.417860333203 1600.9225017701
|
||||
53 0.53 273.932649026687 1497.50391617084
|
||||
54 0.54 259.44136628296 1402.0127334168
|
||||
55 0.55 245.86833026895 1313.74315392891
|
||||
56 0.56 233.144552771494 1232.06081168718
|
||||
57 0.57 221.207066231741 1156.3946825672
|
||||
58 0.58 209.998326488761 1086.23000874818
|
||||
59 0.59 199.465681793803 1021.10209864685
|
||||
60 0.6 189.560899952328 960.590883129588
|
||||
61 0.61 180.239746551915 904.316126537919
|
||||
62 0.62 171.461608172096 851.933205957983
|
||||
63 0.63 163.189155273495 803.129384679904
|
||||
64 0.64 155.388040149985 757.62051633995
|
||||
65 0.65 148.026625916815 715.148125151611
|
||||
66 0.66 141.075743014823 675.476815185188
|
||||
67 0.67 134.508470148273 638.391968073426
|
||||
68 0.68 128.299936952045 603.697693987706
|
||||
69 0.69 122.427146011509 571.215005397664
|
||||
70 0.7 116.868812142757 540.780187122909
|
||||
71 0.71 111.605217088269 512.243339613345
|
||||
72 0.72 106.618077998551 485.467075341772
|
||||
73 0.73 101.890428258492 460.32535073159
|
||||
74 0.74 97.4065093817089 436.702418234594
|
||||
75 0.75 93.1516728402801 414.491885070093
|
||||
76 0.76 89.1122908237631 393.595866780115
|
||||
77 0.77 85.2756750325343 373.92422518237
|
||||
78 0.78 81.6300027083206 355.393881543952
|
||||
79 0.79 78.1642491910339 337.928196880203
|
||||
80 0.8 74.8681263671512 321.456412227117
|
||||
81 0.81 71.7320264421784 305.913142560798
|
||||
82 0.82 68.7469705293107 291.237918759986
|
||||
83 0.83 65.9045615992059 277.374772641144
|
||||
84 0.84 63.1969413826496 264.271860652001
|
||||
85 0.85 60.6167508595336 251.881122298665
|
||||
86 0.86 58.1570940046202 240.157969812335
|
||||
87 0.87 55.8115044935616 229.061005941579
|
||||
88 0.88 53.5739151020755 218.551767091662
|
||||
89 0.89 51.4386295574478 208.594489329042
|
||||
90 0.9 49.400296625012 199.15589503172
|
||||
91 0.91 47.4538862332645 190.204998198848
|
||||
92 0.92 45.5946674600827 181.712926639528
|
||||
93 0.93 43.818188219384 173.652759444123
|
||||
94 0.94 42.1202565026948 165.999378304597
|
||||
95 0.95 40.4969230436942 158.729331395598
|
||||
96 0.96 38.944465286023 151.820708657476
|
||||
97 0.97 37.4593725456438 145.253027437895
|
||||
98 0.98 36.03833226895 139.00712755184
|
||||
99 0.99 34.6782172967515 133.065074912029
|
||||
100 1 33.3760740523261 127.410072964226
|
||||
101 1.01 32.1291115790037 122.026381235854
|
||||
102 1.02 30.9346913593292 116.899240372539
|
||||
103 1.03 29.7903178538011 112.014803096661
|
||||
104 1.04 28.6936297025718 107.360070575369
|
||||
105 1.05 27.6423915383737 102.922833733498
|
||||
106 1.06 26.6344863633652 98.69161908997
|
||||
107 1.07 25.6679084466039 94.6556387351718
|
||||
108 1.08 24.7407567025028 90.804744101767
|
||||
109 1.09 23.8272933774771 87.0424919424945
|
||||
110 1.1 22.9746938084833 83.501083250349
|
||||
111 1.11 22.1568053467682 80.0995862713648
|
||||
112 1.12 21.372249301539 76.8339257499533
|
||||
113 1.13 20.6196874341871 73.7000865054019
|
||||
114 1.14 19.8978213575361 70.6941134318681
|
||||
115 1.15 19.2053919350916 67.8121114983956
|
||||
116 1.16 18.5411786802966 65.0502457488983
|
||||
117 1.17 17.9039991557794 62.4047413021631
|
||||
118 1.18 17.2927083726064 59.8718833518619
|
||||
119 1.19 16.7061981895337 57.4480171665332
|
||||
120 1.2 16.1433967122567 55.1295480895992
|
||||
121 1.21 15.6032676926612 52.9129415393513
|
||||
122 1.22 15.0848099280771 50.7947230089626
|
||||
123 1.23 14.5870566605305 48.7714780664787
|
||||
124 1.24 14.1090749759875 46.8398523548244
|
||||
125 1.25 13.6499652036138 44.9965515917961
|
||||
126 1.26 13.2088603150229 43.2383415700692
|
||||
127 1.27 12.784925323525 41.5620481571964
|
||||
128 1.28 12.3773566833817 39.9645572956038
|
||||
129 1.29 11.9853816890583 38.4428150025965
|
||||
130 1.3 11.60825787447 36.9938273703497
|
||||
131 1.31 11.2452724122346 35.6146605659201
|
||||
132 1.32 10.8957415129272 34.3024408312399
|
||||
133 1.33 10.5590098243284 33.0543544831148
|
||||
134 1.34 10.234449830679 31.8676479132323
|
||||
135 1.35 9.92146125192414 30.739627588146
|
||||
136 1.36 9.61947044297006 29.6676600492951
|
||||
137 1.37 9.32792979293674 28.6491719129885
|
||||
138 1.38 9.04631712440562 27.6816498704143
|
||||
139 1.39 8.77413509267092 26.7626406876384
|
||||
140 1.4 8.51091058499253 25.8897512055996
|
||||
141 1.41 8.25619411984519 25.0606483401084
|
||||
142 1.42 8.00955924617449 24.2730590818669
|
||||
143 1.43 7.77060194264106 23.524770496429
|
||||
144 1.44 7.53894001687615 22.8136297242495
|
||||
145 1.45 7.31421250473534 22.1375439806457
|
||||
146 1.46 7.09607906954318 21.4944805558102
|
||||
147 1.47 6.88421940134788 20.8824668148159
|
||||
148 1.48 6.678332616175 20.2995901976151
|
||||
149 1.49 6.47813665527576 19.743998219024
|
||||
150 1.5 6.28336768437487 19.2138984687485
|
||||
151 1.51 6.09377949293196 18.7075586113629
|
||||
152 1.52 5.90914289338127 18.2233063863173
|
||||
153 1.53 5.72924512039265 17.759529607943
|
||||
154 1.54 5.55388923011117 17.3146761654411
|
||||
155 1.55 5.38289349942409 16.8872540228954
|
||||
156 1.56 5.21609082520001 16.4758312192575
|
||||
157 1.57 5.05332812353902 16.0790358683639
|
||||
158 1.58 4.89446572903546 15.6955561589207
|
||||
159 1.59 4.73937679401809 15.3241403545121
|
||||
160 1.6 4.58794668780786 14.9635967935988
|
||||
161 1.61 4.44007239596442 14.6127938895147
|
||||
162 1.62 4.29566191954302 14.2706601304803
|
||||
163 1.63 4.15463367433779 13.9361840795741
|
||||
164 1.64 4.01691589014092 13.6084143747644
|
||||
165 1.65 3.88244600998598 13.2864597288958
|
||||
166 1.66 3.75117008941481 12.9694889296766
|
||||
167 1.67 3.62304219570535 12.6567308397093
|
||||
168 1.68 3.49802380713948 12.3474743964548
|
||||
169 1.69 3.37608321225264 12.04106861226
|
||||
170 1.7 3.25719490908079 11.7369225743464
|
||||
171 1.71 3.14133900441368 11.4345054448136
|
||||
172 1.72 3.02850061304264 11.1333464606264
|
||||
173 1.73 2.91866925702027 10.8330349336425
|
||||
174 1.74 2.81183826489826 10.533220250582
|
||||
175 1.75 2.70800417099883 10.233611873045
|
||||
176 1.76 2.60716611463897 9.93397933750975
|
||||
177 1.77 2.50932523941003 9.63415225533254
|
||||
178 1.78 2.41448409240832 9.3340203127359
|
||||
179 1.79 2.3226460234938 9.03353327083141
|
||||
180 1.8 2.23381458454253 8.732700965596
|
||||
181 1.81 2.14799292869986 8.43159330788831
|
||||
182 1.82 2.06518320961743 8.13034028344418
|
||||
183 1.83 1.98538598072548 7.82913195286937
|
||||
184 1.84 1.90859959447164 7.52821845164772
|
||||
185 1.85 1.83481960157053 7.2279099901466
|
||||
186 1.86 1.7640381502589 6.92857685359786
|
||||
187 1.87 1.69624338555161 6.63064940211416
|
||||
188 1.88 1.63141884849222 6.33461807069079
|
||||
189 1.89 1.56954287538349 6.04103336918388
|
||||
190 1.9 1.51058799706618 5.75050588234672
|
||||
191 1.91 1.45452033816503 5.46370626978796
|
||||
192 1.92 1.40129901631838 5.18136526601165
|
||||
193 1.93 1.35087554145878 4.90427368037354
|
||||
194 1.94 1.30319321504351 4.63328239712837
|
||||
195 1.95 1.25818652931969 4.36930237539354
|
||||
196 1.96 1.21578056655756 4.11330464916909
|
||||
197 1.97 1.17589039832831 3.86632032733041
|
||||
198 1.98 1.13842048472736 3.62944059362462
|
||||
199 1.99 1.10326407363948 3.4038167066833
|
||||
200 2 1.07030260000113 3.19065999999702
|
||||
201 2.01 1.03940508502285 2.99124188195356
|
||||
202 2.02 1.01042753546074 2.80689383580147
|
||||
203 2.03 0.98321234287971 2.63900741967382
|
||||
204 2.04 0.957587682867711 2.48903426657353
|
||||
205 2.05 0.93336691431989 2.35848608439528
|
||||
206 2.06 0.910347978676441 2.24893465587638
|
||||
207 2.07 0.888312799175878 2.16201183866769
|
||||
208 2.08 0.867026680100139 2.09940956527362
|
||||
209 2.09 0.846311972535292 2.0501194750481
|
||||
210 2.1 0.826052090711088 2.00205155230487
|
||||
211 2.11 0.806267097861257 1.95513674544071
|
||||
212 2.12 0.786945609506685 1.90934585391834
|
||||
213 2.13 0.7680765291529 1.86465047725327
|
||||
214 2.14 0.749649040410966 1.82102299086693
|
||||
215 2.15 0.731652599355765 1.77843652275056
|
||||
216 2.16 0.714076927113717 1.73686493090964
|
||||
217 2.17 0.696912002672274 1.69628278155967
|
||||
218 2.18 0.680148055903811 1.65666532804561
|
||||
219 2.19 0.663775560796824 1.61798849045792
|
||||
220 2.2 0.647785228887586 1.5802288359199
|
||||
221 2.21 0.632168002885678 1.54336355952145
|
||||
222 2.22 0.616915050487049 1.50737046587586
|
||||
223 2.23 0.602017758368489 1.47222795127685
|
||||
224 2.24 0.587467726357617 1.43791498643425
|
||||
225 2.25 0.573256761772716 1.40441109976729
|
||||
226 2.26 0.559376873926913 1.37169636123572
|
||||
227 2.27 0.54582026879146 1.33975136668933
|
||||
228 2.28 0.532579343812991 1.30855722271748
|
||||
229 2.29 0.519646682879861 1.27809553198093
|
||||
230 2.3 0.507015051432831 1.24834837900889
|
||||
231 2.31 0.494677391715517 1.21929831644497
|
||||
232 2.32 0.482626818160208 1.19092835172618
|
||||
233 2.33 0.470856612904789 1.16322193418007
|
||||
234 2.34 0.459360221436662 1.1361629425252
|
||||
235 2.35 0.448131248359705 1.10973567276124
|
||||
236 2.36 0.437163453280438 1.08392482643501
|
||||
237 2.37 0.426450746809703 1.05871549926971
|
||||
238 2.38 0.415987186676291 1.0340931701448
|
||||
239 2.39 0.405766973949056 1.01004369041464
|
||||
240 2.4 0.395784449364215 0.986553273554313
|
||||
241 2.41 0.38603408975458 0.963608485121541
|
||||
242 2.42 0.376510504577652 0.941196233024116
|
||||
243 2.43 0.367208432539548 0.919303758082476
|
||||
244 2.44 0.358122738311864 0.897918624877599
|
||||
245 2.45 0.349248409338683 0.877028712874665
|
||||
246 2.46 0.340580552730998 0.85662220781334
|
||||
247 2.47 0.332114392245936 0.83668759335581
|
||||
248 2.48 0.323845265348255 0.8172136429841
|
||||
249 2.49 0.315768620351645 0.798189412138432
|
||||
250 2.5 0.307880013637488 0.779604230588763
|
||||
251 2.51 0.300175106948751 0.761447695031842
|
||||
252 2.52 0.292649664756829 0.743709661906479
|
||||
253 2.53 0.285299551699157 0.726380240419923
|
||||
254 2.54 0.278120730085547 0.70944978577854
|
||||
255 2.55 0.271109257471214 0.69290889261619
|
||||
256 2.56 0.264149883217677 0.704309455966822
|
||||
257 2.57 0.257023670963235 0.720548414483017
|
||||
258 2.58 0.249746544950963 0.73449858053557
|
||||
259 2.59 0.242341191411443 0.746201328856265
|
||||
260 2.6 0.234829852474346 0.755704032488528
|
||||
261 2.61 0.227234268357011 0.763059628544665
|
||||
262 2.62 0.219575623890343 0.768326185205915
|
||||
263 2.63 0.211874499363033 0.771566471270733
|
||||
264 2.64 0.204150825652343 0.772847529495918
|
||||
265 2.65 0.196423843597567 0.772240254912945
|
||||
266 2.66 0.188712067560753 0.769818979238484
|
||||
267 2.67 0.181033253108439 0.765661062433794
|
||||
268 2.68 0.17340436873789 0.759846492402718
|
||||
269 2.69 0.165841571561793 0.752457493752577
|
||||
270 2.7 0.158360186856416 0.743578146476553
|
||||
271 2.71 0.150974691370005 0.733294015350452
|
||||
272 2.72 0.14369870028059 0.721691790771177
|
||||
273 2.73 0.136544957685405 0.708858941699004
|
||||
274 2.74 0.129525330497857 0.69488338130108
|
||||
275 2.75 0.122650805622294 0.679853145829612
|
||||
276 2.76 0.115931490271846 0.663856087205069
|
||||
277 2.77 0.10937661529017 0.646979579712699
|
||||
278 2.78 0.102994541334199 0.629310241159726
|
||||
279 2.79 0.0967927677717845 0.610933668781077
|
||||
280 2.8 0.0907779441455664 0.591934190123306
|
||||
281 2.81 0.0849558840523623 0.572394629079895
|
||||
282 2.82 0.0793315812859323 0.552396087196204
|
||||
283 2.83 0.0739092280900552 0.532017740309284
|
||||
284 2.84 0.0686922353684514 0.51133665053656
|
||||
285 2.85 0.0636832546982004 0.490427593578163
|
||||
286 2.86 0.058884201993894 0.469362901250494
|
||||
287 2.87 0.0542962826708072 0.448212319123508
|
||||
288 2.88 0.0499200181568738 0.427042879091302
|
||||
289 2.89 0.04575527360515 0.405918786664859
|
||||
290 2.9 0.0418012866607671 0.384901322737369
|
||||
291 2.91 0.0380566971390415 0.364048759536354
|
||||
292 2.92 0.0345195774744505 0.343416290443013
|
||||
293 2.93 0.0311874638035265 0.323055973327595
|
||||
294 2.94 0.0280573875483925 0.303016687020528
|
||||
295 2.95 0.0251259073715924 0.283344100512051
|
||||
296 2.96 0.0223891413770724 0.264080654448684
|
||||
297 2.97 0.0198427994365954 0.245265554472591
|
||||
298 2.98 0.017482215525515 0.226934775930029
|
||||
299 2.99 0.0153023799566589 0.20912107945741
|
||||
300 3 0.0132979714060716 0.191854036938167
|
||||
301 3.01 0.0114633886294948 0.175160067310366
|
||||
302 3.02 0.009792781773727 0.159062481694054
|
||||
303 3.03 0.00828008319235769 0.143581537298372
|
||||
304 3.04 0.00691903768080766 0.128734499561665
|
||||
305 3.05 0.00570323205109961 0.114535711972949
|
||||
306 3.06 0.0046261239723178 0.100996673020243
|
||||
307 3.07 0.00368107000826405 0.088126119710232
|
||||
308 3.08 0.00286135278937321 0.0759301171045549
|
||||
309 3.09 0.00216020726148435 0.0644121533205429
|
||||
310 3.1 0.00157084595956787 0.0535732394484653
|
||||
311 3.11 0.00108648325995938 0.0434120138431347
|
||||
312 3.12 0.000700358570037423 0.0339248502550448
|
||||
313 3.13 0.000405758419588332 0.0251059692749879
|
||||
314 3.14 0.000196037423311657 0.0169475525761798
|
||||
315 3.15 6.46380890244662E-05 0.00943985944932857
|
||||
316 3.16 5.1094511057313E-06 0.00257134513861945
|
||||
317 3.17 0 0
|
||||
318 3.18 0 0
|
||||
319 3.19 0 0
|
||||
320 3.2 0 0
|
||||
321 3.21 0 0
|
||||
322 3.22 0 0
|
||||
323 3.23 0 0
|
||||
324 3.24 0 0
|
||||
325 3.25 0 0
|
||||
45
examples/snap/in.snap.W.2940
Normal file
45
examples/snap/in.snap.W.2940
Normal file
@ -0,0 +1,45 @@
|
||||
# Demonstrate SNAP Ta potential
|
||||
|
||||
# Initialize simulation
|
||||
|
||||
variable nsteps index 100
|
||||
variable nrep equal 4
|
||||
variable a equal 3.1803
|
||||
units metal
|
||||
|
||||
# generate the box and atom positions using a BCC lattice
|
||||
|
||||
variable nx equal ${nrep}
|
||||
variable ny equal ${nrep}
|
||||
variable nz equal ${nrep}
|
||||
|
||||
boundary p p p
|
||||
|
||||
lattice bcc $a
|
||||
region box block 0 ${nx} 0 ${ny} 0 ${nz}
|
||||
create_box 1 box
|
||||
create_atoms 1 box
|
||||
|
||||
mass 1 183.84
|
||||
|
||||
# choose potential
|
||||
|
||||
include W_2940_2017_2.pot.snap
|
||||
|
||||
# Setup output
|
||||
|
||||
thermo 10
|
||||
thermo_modify norm yes
|
||||
|
||||
# Set up NVE run
|
||||
|
||||
timestep 0.5e-3
|
||||
neighbor 1.0 bin
|
||||
neigh_modify once no every 1 delay 0 check yes
|
||||
|
||||
# Run MD
|
||||
|
||||
velocity all create 300.0 4928459
|
||||
fix 1 all nve
|
||||
run ${nsteps}
|
||||
|
||||
48
examples/snap/in.snap.hybrid.WSNAP.HePair
Normal file
48
examples/snap/in.snap.hybrid.WSNAP.HePair
Normal file
@ -0,0 +1,48 @@
|
||||
# Demonstrate SNAP Ta potential
|
||||
|
||||
# Initialize simulation
|
||||
|
||||
variable nsteps index 100
|
||||
variable nrep equal 4
|
||||
variable a equal 3.1803
|
||||
units metal
|
||||
|
||||
# generate the box and atom positions using a BCC lattice
|
||||
|
||||
variable nx equal ${nrep}
|
||||
variable ny equal ${nrep}
|
||||
variable nz equal ${nrep}
|
||||
|
||||
boundary p p p
|
||||
|
||||
lattice bcc $a
|
||||
region box block 0 ${nx} 0 ${ny} 0 ${nz}
|
||||
create_box 2 box
|
||||
create_atoms 1 box
|
||||
mass 1 183.84
|
||||
mass 2 4.0026
|
||||
|
||||
set group all type/fraction 2 0.05 3590153 # Change 5% of W to He
|
||||
group tungsten type 1
|
||||
group helium type 2
|
||||
# choose potential
|
||||
|
||||
include W.SNAP_HePair.pot
|
||||
|
||||
# Setup output
|
||||
|
||||
thermo 10
|
||||
thermo_modify norm yes
|
||||
|
||||
# Set up NVE run
|
||||
|
||||
timestep 0.5e-3
|
||||
neighbor 1.0 bin
|
||||
neigh_modify once no every 1 delay 0 check yes
|
||||
|
||||
# Run MD
|
||||
|
||||
velocity all create 300.0 4928459
|
||||
fix 1 all nve
|
||||
run ${nsteps}
|
||||
|
||||
144
examples/snap/log.21Feb17.snap.W.2940.g++.1
Normal file
144
examples/snap/log.21Feb17.snap.W.2940.g++.1
Normal file
@ -0,0 +1,144 @@
|
||||
LAMMPS (13 Feb 2017)
|
||||
# Demonstrate SNAP Ta potential
|
||||
|
||||
# Initialize simulation
|
||||
|
||||
variable nsteps index 100
|
||||
variable nrep equal 4
|
||||
variable a equal 3.1803
|
||||
units metal
|
||||
|
||||
# generate the box and atom positions using a BCC lattice
|
||||
|
||||
variable nx equal ${nrep}
|
||||
variable nx equal 4
|
||||
variable ny equal ${nrep}
|
||||
variable ny equal 4
|
||||
variable nz equal ${nrep}
|
||||
variable nz equal 4
|
||||
|
||||
boundary p p p
|
||||
|
||||
lattice bcc $a
|
||||
lattice bcc 3.1803
|
||||
Lattice spacing in x,y,z = 3.1803 3.1803 3.1803
|
||||
region box block 0 ${nx} 0 ${ny} 0 ${nz}
|
||||
region box block 0 4 0 ${ny} 0 ${nz}
|
||||
region box block 0 4 0 4 0 ${nz}
|
||||
region box block 0 4 0 4 0 4
|
||||
create_box 1 box
|
||||
Created orthogonal box = (0 0 0) to (12.7212 12.7212 12.7212)
|
||||
1 by 1 by 1 MPI processor grid
|
||||
create_atoms 1 box
|
||||
Created 128 atoms
|
||||
|
||||
mass 1 183.84
|
||||
|
||||
# choose potential
|
||||
|
||||
include W_2940_2017_2.pot.snap
|
||||
# DATE: 2017-02-20 CONTRIBUTOR: Mitchell Wood mitwood@sandia.gov CITATION: Wood, M. A. and Thompson, A. P. to appear in arxiv Feb2017
|
||||
#
|
||||
# Definition of SNAP+ZBL potential.
|
||||
variable zblcutinner equal 4
|
||||
variable zblcutouter equal 4.8
|
||||
variable zblz equal 74
|
||||
|
||||
# Specify hybrid with SNAP, ZBL, and long-range Coulomb
|
||||
|
||||
pair_style hybrid/overlay zbl ${zblcutinner} ${zblcutouter} snap
|
||||
pair_style hybrid/overlay zbl 4 ${zblcutouter} snap
|
||||
pair_style hybrid/overlay zbl 4 4.8 snap
|
||||
pair_coeff 1 1 zbl ${zblz} ${zblz}
|
||||
pair_coeff 1 1 zbl 74 ${zblz}
|
||||
pair_coeff 1 1 zbl 74 74
|
||||
pair_coeff * * snap W_2940_2017_2.snapcoeff W W_2940_2017_2.snapparam W
|
||||
Reading potential file W_2940_2017_2.snapcoeff with DATE: 2017-02-20
|
||||
SNAP Element = W, Radius 0.5, Weight 1
|
||||
Reading potential file W_2940_2017_2.snapparam with DATE: 2017-02-20
|
||||
SNAP keyword rcutfac 4.73442
|
||||
SNAP keyword twojmax 8
|
||||
SNAP keyword gamma 1
|
||||
SNAP keyword rfac0 0.99363
|
||||
SNAP keyword rmin0 0
|
||||
SNAP keyword diagonalstyle 3
|
||||
|
||||
#Nomenclature on the snap files are Element_DakotaID_Year_Month
|
||||
|
||||
# Setup output
|
||||
|
||||
thermo 10
|
||||
thermo_modify norm yes
|
||||
|
||||
# Set up NVE run
|
||||
|
||||
timestep 0.5e-3
|
||||
neighbor 1.0 bin
|
||||
neigh_modify once no every 1 delay 0 check yes
|
||||
|
||||
# Run MD
|
||||
|
||||
velocity all create 300.0 4928459
|
||||
fix 1 all nve
|
||||
run ${nsteps}
|
||||
run 100
|
||||
Neighbor list info ...
|
||||
update every 1 steps, delay 0 steps, check yes
|
||||
max neighbors/atom: 2000, page size: 100000
|
||||
master list distance cutoff = 5.8
|
||||
ghost atom cutoff = 5.8
|
||||
binsize = 2.9, bins = 5 5 5
|
||||
2 neighbor lists, perpetual/occasional/extra = 2 0 0
|
||||
(1) pair zbl, perpetual, half/full from (2)
|
||||
attributes: half, newton on
|
||||
pair build: halffull/newton
|
||||
stencil: none
|
||||
bin: none
|
||||
(2) pair snap, perpetual
|
||||
attributes: full, newton on
|
||||
pair build: full/bin/atomonly
|
||||
stencil: full/bin/3d
|
||||
bin: standard
|
||||
Memory usage per processor = 5.14696 Mbytes
|
||||
Step Temp E_pair E_mol TotEng Press
|
||||
0 300 -11.028325 0 -10.98985 3010.497
|
||||
10 293.40666 -11.027479 0 -10.989849 3246.0559
|
||||
20 274.27375 -11.025025 0 -10.989849 3927.9497
|
||||
30 244.50457 -11.021207 0 -10.989849 4983.5484
|
||||
40 207.0784 -11.016407 0 -10.989849 6299.9473
|
||||
50 165.74442 -11.011105 0 -10.989848 7736.5123
|
||||
60 124.62181 -11.005831 0 -10.989848 9140.8587
|
||||
70 87.744792 -11.001101 0 -10.989848 10366.489
|
||||
80 58.605244 -10.997364 0 -10.989848 11289.914
|
||||
90 39.754503 -10.994946 0 -10.989848 11824.945
|
||||
100 32.524085 -10.994019 0 -10.989848 11932.118
|
||||
Loop time of 11.8271 on 1 procs for 100 steps with 128 atoms
|
||||
|
||||
Performance: 0.365 ns/day, 65.706 hours/ns, 8.455 timesteps/s
|
||||
99.9% CPU use with 1 MPI tasks x no OpenMP threads
|
||||
|
||||
MPI task timing breakdown:
|
||||
Section | min time | avg time | max time |%varavg| %total
|
||||
---------------------------------------------------------------
|
||||
Pair | 11.826 | 11.826 | 11.826 | 0.0 | 99.99
|
||||
Neigh | 0 | 0 | 0 | 0.0 | 0.00
|
||||
Comm | 0.00044084 | 0.00044084 | 0.00044084 | 0.0 | 0.00
|
||||
Output | 0.00013232 | 0.00013232 | 0.00013232 | 0.0 | 0.00
|
||||
Modify | 0.00021887 | 0.00021887 | 0.00021887 | 0.0 | 0.00
|
||||
Other | | 0.0002718 | | | 0.00
|
||||
|
||||
Nlocal: 128 ave 128 max 128 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
Nghost: 727 ave 727 max 727 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
Neighs: 3712 ave 3712 max 3712 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
FullNghs: 7424 ave 7424 max 7424 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
|
||||
Total # of neighbors = 7424
|
||||
Ave neighs/atom = 58
|
||||
Neighbor list builds = 0
|
||||
Dangerous builds = 0
|
||||
|
||||
Total wall time: 0:00:11
|
||||
144
examples/snap/log.21Feb17.snap.W.2940.g++.4
Normal file
144
examples/snap/log.21Feb17.snap.W.2940.g++.4
Normal file
@ -0,0 +1,144 @@
|
||||
LAMMPS (13 Feb 2017)
|
||||
# Demonstrate SNAP Ta potential
|
||||
|
||||
# Initialize simulation
|
||||
|
||||
variable nsteps index 100
|
||||
variable nrep equal 4
|
||||
variable a equal 3.1803
|
||||
units metal
|
||||
|
||||
# generate the box and atom positions using a BCC lattice
|
||||
|
||||
variable nx equal ${nrep}
|
||||
variable nx equal 4
|
||||
variable ny equal ${nrep}
|
||||
variable ny equal 4
|
||||
variable nz equal ${nrep}
|
||||
variable nz equal 4
|
||||
|
||||
boundary p p p
|
||||
|
||||
lattice bcc $a
|
||||
lattice bcc 3.1803
|
||||
Lattice spacing in x,y,z = 3.1803 3.1803 3.1803
|
||||
region box block 0 ${nx} 0 ${ny} 0 ${nz}
|
||||
region box block 0 4 0 ${ny} 0 ${nz}
|
||||
region box block 0 4 0 4 0 ${nz}
|
||||
region box block 0 4 0 4 0 4
|
||||
create_box 1 box
|
||||
Created orthogonal box = (0 0 0) to (12.7212 12.7212 12.7212)
|
||||
1 by 2 by 2 MPI processor grid
|
||||
create_atoms 1 box
|
||||
Created 128 atoms
|
||||
|
||||
mass 1 183.84
|
||||
|
||||
# choose potential
|
||||
|
||||
include W_2940_2017_2.pot.snap
|
||||
# DATE: 2017-02-20 CONTRIBUTOR: Mitchell Wood mitwood@sandia.gov CITATION: Wood, M. A. and Thompson, A. P. to appear in arxiv Feb2017
|
||||
#
|
||||
# Definition of SNAP+ZBL potential.
|
||||
variable zblcutinner equal 4
|
||||
variable zblcutouter equal 4.8
|
||||
variable zblz equal 74
|
||||
|
||||
# Specify hybrid with SNAP, ZBL, and long-range Coulomb
|
||||
|
||||
pair_style hybrid/overlay zbl ${zblcutinner} ${zblcutouter} snap
|
||||
pair_style hybrid/overlay zbl 4 ${zblcutouter} snap
|
||||
pair_style hybrid/overlay zbl 4 4.8 snap
|
||||
pair_coeff 1 1 zbl ${zblz} ${zblz}
|
||||
pair_coeff 1 1 zbl 74 ${zblz}
|
||||
pair_coeff 1 1 zbl 74 74
|
||||
pair_coeff * * snap W_2940_2017_2.snapcoeff W W_2940_2017_2.snapparam W
|
||||
Reading potential file W_2940_2017_2.snapcoeff with DATE: 2017-02-20
|
||||
SNAP Element = W, Radius 0.5, Weight 1
|
||||
Reading potential file W_2940_2017_2.snapparam with DATE: 2017-02-20
|
||||
SNAP keyword rcutfac 4.73442
|
||||
SNAP keyword twojmax 8
|
||||
SNAP keyword gamma 1
|
||||
SNAP keyword rfac0 0.99363
|
||||
SNAP keyword rmin0 0
|
||||
SNAP keyword diagonalstyle 3
|
||||
|
||||
#Nomenclature on the snap files are Element_DakotaID_Year_Month
|
||||
|
||||
# Setup output
|
||||
|
||||
thermo 10
|
||||
thermo_modify norm yes
|
||||
|
||||
# Set up NVE run
|
||||
|
||||
timestep 0.5e-3
|
||||
neighbor 1.0 bin
|
||||
neigh_modify once no every 1 delay 0 check yes
|
||||
|
||||
# Run MD
|
||||
|
||||
velocity all create 300.0 4928459
|
||||
fix 1 all nve
|
||||
run ${nsteps}
|
||||
run 100
|
||||
Neighbor list info ...
|
||||
update every 1 steps, delay 0 steps, check yes
|
||||
max neighbors/atom: 2000, page size: 100000
|
||||
master list distance cutoff = 5.8
|
||||
ghost atom cutoff = 5.8
|
||||
binsize = 2.9, bins = 5 5 5
|
||||
2 neighbor lists, perpetual/occasional/extra = 2 0 0
|
||||
(1) pair zbl, perpetual, half/full from (2)
|
||||
attributes: half, newton on
|
||||
pair build: halffull/newton
|
||||
stencil: none
|
||||
bin: none
|
||||
(2) pair snap, perpetual
|
||||
attributes: full, newton on
|
||||
pair build: full/bin/atomonly
|
||||
stencil: full/bin/3d
|
||||
bin: standard
|
||||
Memory usage per processor = 5.12833 Mbytes
|
||||
Step Temp E_pair E_mol TotEng Press
|
||||
0 300 -11.028325 0 -10.98985 3010.497
|
||||
10 293.22504 -11.027456 0 -10.989849 3258.275
|
||||
20 273.60084 -11.024939 0 -10.989849 3973.9038
|
||||
30 243.15327 -11.021034 0 -10.989849 5077.9172
|
||||
40 205.01905 -11.016142 0 -10.989849 6448.4941
|
||||
50 163.10914 -11.010767 0 -10.989848 7935.6835
|
||||
60 121.67854 -11.005453 0 -10.989848 9378.9959
|
||||
70 84.846972 -11.000729 0 -10.989848 10626.301
|
||||
80 56.127265 -10.997046 0 -10.989848 11551.687
|
||||
90 38.025013 -10.994724 0 -10.989847 12069.936
|
||||
100 31.768127 -10.993922 0 -10.989847 12145.648
|
||||
Loop time of 3.03545 on 4 procs for 100 steps with 128 atoms
|
||||
|
||||
Performance: 1.423 ns/day, 16.864 hours/ns, 32.944 timesteps/s
|
||||
99.9% CPU use with 4 MPI tasks x no OpenMP threads
|
||||
|
||||
MPI task timing breakdown:
|
||||
Section | min time | avg time | max time |%varavg| %total
|
||||
---------------------------------------------------------------
|
||||
Pair | 2.9594 | 2.9866 | 3.0319 | 1.6 | 98.39
|
||||
Neigh | 0 | 0 | 0 | 0.0 | 0.00
|
||||
Comm | 0.0024238 | 0.047825 | 0.075032 | 12.5 | 1.58
|
||||
Output | 0.00021601 | 0.00024045 | 0.00027442 | 0.0 | 0.01
|
||||
Modify | 9.6798e-05 | 0.00011188 | 0.00011802 | 0.0 | 0.00
|
||||
Other | | 0.000698 | | | 0.02
|
||||
|
||||
Nlocal: 32 ave 32 max 32 min
|
||||
Histogram: 4 0 0 0 0 0 0 0 0 0
|
||||
Nghost: 431 ave 431 max 431 min
|
||||
Histogram: 4 0 0 0 0 0 0 0 0 0
|
||||
Neighs: 928 ave 928 max 928 min
|
||||
Histogram: 4 0 0 0 0 0 0 0 0 0
|
||||
FullNghs: 1856 ave 1856 max 1856 min
|
||||
Histogram: 4 0 0 0 0 0 0 0 0 0
|
||||
|
||||
Total # of neighbors = 7424
|
||||
Ave neighs/atom = 58
|
||||
Neighbor list builds = 0
|
||||
Dangerous builds = 0
|
||||
|
||||
Total wall time: 0:00:03
|
||||
179
examples/snap/log.21Feb17.snap.hybrid.WSNAP.HePair.g++.1
Normal file
179
examples/snap/log.21Feb17.snap.hybrid.WSNAP.HePair.g++.1
Normal file
@ -0,0 +1,179 @@
|
||||
LAMMPS (13 Feb 2017)
|
||||
# Demonstrate SNAP Ta potential
|
||||
|
||||
# Initialize simulation
|
||||
|
||||
variable nsteps index 100
|
||||
variable nrep equal 4
|
||||
variable a equal 3.1803
|
||||
units metal
|
||||
|
||||
# generate the box and atom positions using a BCC lattice
|
||||
|
||||
variable nx equal ${nrep}
|
||||
variable nx equal 4
|
||||
variable ny equal ${nrep}
|
||||
variable ny equal 4
|
||||
variable nz equal ${nrep}
|
||||
variable nz equal 4
|
||||
|
||||
boundary p p p
|
||||
|
||||
lattice bcc $a
|
||||
lattice bcc 3.1803
|
||||
Lattice spacing in x,y,z = 3.1803 3.1803 3.1803
|
||||
region box block 0 ${nx} 0 ${ny} 0 ${nz}
|
||||
region box block 0 4 0 ${ny} 0 ${nz}
|
||||
region box block 0 4 0 4 0 ${nz}
|
||||
region box block 0 4 0 4 0 4
|
||||
create_box 2 box
|
||||
Created orthogonal box = (0 0 0) to (12.7212 12.7212 12.7212)
|
||||
1 by 1 by 1 MPI processor grid
|
||||
create_atoms 1 box
|
||||
Created 128 atoms
|
||||
mass 1 183.84
|
||||
mass 2 4.0026
|
||||
|
||||
set group all type/fraction 2 0.05 3590153 # Change 5% of W to He
|
||||
5 settings made for type/fraction
|
||||
group tungsten type 1
|
||||
123 atoms in group tungsten
|
||||
group helium type 2
|
||||
5 atoms in group helium
|
||||
# choose potential
|
||||
|
||||
include W.SNAP_HePair.pot
|
||||
# DATE: 2017-02-20 CONTRIBUTOR: Mitchell Wood mitwood@sandia.gov CITATION: Wood, M. A. and Thompson, A. P. to appear in arxiv Feb2017, W-He and He-He from Juslin, N. and Wirth, B. D. Journal of Nuclear Materials, 423, (2013) p61-63
|
||||
#
|
||||
# Definition of SNAP+ZBL+Tabulated potential.
|
||||
variable zblcutinner equal 4
|
||||
variable zblcutouter equal 4.8
|
||||
variable zblz equal 74
|
||||
|
||||
# Specify hybrid with SNAP, ZBL, and long-range Coulomb
|
||||
|
||||
pair_style hybrid/overlay zbl ${zblcutinner} ${zblcutouter} snap table spline 10000 table spline 10000
|
||||
pair_style hybrid/overlay zbl 4 ${zblcutouter} snap table spline 10000 table spline 10000
|
||||
pair_style hybrid/overlay zbl 4 4.8 snap table spline 10000 table spline 10000
|
||||
pair_coeff 1 1 zbl ${zblz} ${zblz}
|
||||
pair_coeff 1 1 zbl 74 ${zblz}
|
||||
pair_coeff 1 1 zbl 74 74
|
||||
pair_coeff * * snap W_2940_2017_2.snapcoeff W W_2940_2017_2.snapparam W NULL
|
||||
Reading potential file W_2940_2017_2.snapcoeff with DATE: 2017-02-20
|
||||
SNAP Element = W, Radius 0.5, Weight 1
|
||||
Reading potential file W_2940_2017_2.snapparam with DATE: 2017-02-20
|
||||
SNAP keyword rcutfac 4.73442
|
||||
SNAP keyword twojmax 8
|
||||
SNAP keyword gamma 1
|
||||
SNAP keyword rfac0 0.99363
|
||||
SNAP keyword rmin0 0
|
||||
SNAP keyword diagonalstyle 3
|
||||
pair_coeff 2 2 table 1 He_He_JW2013.table HeHe
|
||||
Reading potential file He_He_JW2013.table with DATE: 2017-02-20
|
||||
WARNING: 1 of 4999 force values in table are inconsistent with -dE/dr.
|
||||
Should only be flagged at inflection points (../pair_table.cpp:476)
|
||||
pair_coeff 1 2 table 2 W_He_JW2013.table WHe
|
||||
Reading potential file W_He_JW2013.table with DATE: 2017-02-20
|
||||
WARNING: 3 of 325 force values in table are inconsistent with -dE/dr.
|
||||
Should only be flagged at inflection points (../pair_table.cpp:476)
|
||||
#Hybrid/overlay will take all pair styles and add their contributions equally, order of pair_coeff doesnt matter here
|
||||
#This is not the case for pair_style hybrid ... where only one pair_coeff is read for each type combination, order matters here.
|
||||
|
||||
|
||||
# Setup output
|
||||
|
||||
thermo 10
|
||||
thermo_modify norm yes
|
||||
|
||||
# Set up NVE run
|
||||
|
||||
timestep 0.5e-3
|
||||
neighbor 1.0 bin
|
||||
neigh_modify once no every 1 delay 0 check yes
|
||||
|
||||
# Run MD
|
||||
|
||||
velocity all create 300.0 4928459
|
||||
fix 1 all nve
|
||||
run ${nsteps}
|
||||
run 100
|
||||
Neighbor list info ...
|
||||
update every 1 steps, delay 0 steps, check yes
|
||||
max neighbors/atom: 2000, page size: 100000
|
||||
master list distance cutoff = 5.8
|
||||
ghost atom cutoff = 5.8
|
||||
binsize = 2.9, bins = 5 5 5
|
||||
6 neighbor lists, perpetual/occasional/extra = 6 0 0
|
||||
(1) pair zbl, perpetual, half/full from (2)
|
||||
attributes: half, newton on
|
||||
pair build: halffull/newton/skip
|
||||
stencil: none
|
||||
bin: none
|
||||
(2) pair snap, perpetual, skip from (6)
|
||||
attributes: full, newton on
|
||||
pair build: skip
|
||||
stencil: none
|
||||
bin: none
|
||||
(3) pair table, perpetual, skip from (5)
|
||||
attributes: half, newton on
|
||||
pair build: skip
|
||||
stencil: none
|
||||
bin: none
|
||||
(4) pair table, perpetual, skip from (5)
|
||||
attributes: half, newton on
|
||||
pair build: skip
|
||||
stencil: none
|
||||
bin: none
|
||||
(5) neighbor class addition, perpetual, half/full from (6)
|
||||
attributes: half, newton on
|
||||
pair build: halffull/newton
|
||||
stencil: none
|
||||
bin: none
|
||||
(6) neighbor class addition, perpetual
|
||||
attributes: full, newton on
|
||||
pair build: full/bin/atomonly
|
||||
stencil: full/bin/3d
|
||||
bin: standard
|
||||
Memory usage per processor = 7.6729 Mbytes
|
||||
Step Temp E_pair E_mol TotEng Press
|
||||
0 300 -10.438105 0 -10.39963 -5445.2808
|
||||
10 290.48923 -10.436885 0 -10.399629 -5646.4813
|
||||
20 271.18868 -10.434409 0 -10.399629 -5654.4646
|
||||
30 246.2601 -10.431212 0 -10.399629 -5281.8873
|
||||
40 218.69918 -10.427677 0 -10.399629 -4343.3636
|
||||
50 189.12519 -10.423885 0 -10.399629 -2903.1138
|
||||
60 155.55701 -10.419579 0 -10.399629 -1402.2278
|
||||
70 118.83581 -10.414869 0 -10.399629 -146.36141
|
||||
80 85.903126 -10.410645 0 -10.399628 857.74986
|
||||
90 65.223651 -10.407993 0 -10.399628 1494.2746
|
||||
100 59.833542 -10.407302 0 -10.399628 1938.9164
|
||||
Loop time of 11.0736 on 1 procs for 100 steps with 128 atoms
|
||||
|
||||
Performance: 0.390 ns/day, 61.520 hours/ns, 9.030 timesteps/s
|
||||
99.9% CPU use with 1 MPI tasks x no OpenMP threads
|
||||
|
||||
MPI task timing breakdown:
|
||||
Section | min time | avg time | max time |%varavg| %total
|
||||
---------------------------------------------------------------
|
||||
Pair | 11.072 | 11.072 | 11.072 | 0.0 | 99.99
|
||||
Neigh | 0.00041604 | 0.00041604 | 0.00041604 | 0.0 | 0.00
|
||||
Comm | 0.00046253 | 0.00046253 | 0.00046253 | 0.0 | 0.00
|
||||
Output | 0.0001657 | 0.0001657 | 0.0001657 | 0.0 | 0.00
|
||||
Modify | 0.0002265 | 0.0002265 | 0.0002265 | 0.0 | 0.00
|
||||
Other | | 0.0003119 | | | 0.00
|
||||
|
||||
Nlocal: 128 ave 128 max 128 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
Nghost: 727 ave 727 max 727 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
Neighs: 3425 ave 3425 max 3425 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
FullNghs: 6850 ave 6850 max 6850 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
|
||||
Total # of neighbors = 6850
|
||||
Ave neighs/atom = 53.5156
|
||||
Neighbor list builds = 1
|
||||
Dangerous builds = 0
|
||||
|
||||
Total wall time: 0:00:11
|
||||
179
examples/snap/log.21Feb17.snap.hybrid.WSNAP.HePair.g++.4
Normal file
179
examples/snap/log.21Feb17.snap.hybrid.WSNAP.HePair.g++.4
Normal file
@ -0,0 +1,179 @@
|
||||
LAMMPS (13 Feb 2017)
|
||||
# Demonstrate SNAP Ta potential
|
||||
|
||||
# Initialize simulation
|
||||
|
||||
variable nsteps index 100
|
||||
variable nrep equal 4
|
||||
variable a equal 3.1803
|
||||
units metal
|
||||
|
||||
# generate the box and atom positions using a BCC lattice
|
||||
|
||||
variable nx equal ${nrep}
|
||||
variable nx equal 4
|
||||
variable ny equal ${nrep}
|
||||
variable ny equal 4
|
||||
variable nz equal ${nrep}
|
||||
variable nz equal 4
|
||||
|
||||
boundary p p p
|
||||
|
||||
lattice bcc $a
|
||||
lattice bcc 3.1803
|
||||
Lattice spacing in x,y,z = 3.1803 3.1803 3.1803
|
||||
region box block 0 ${nx} 0 ${ny} 0 ${nz}
|
||||
region box block 0 4 0 ${ny} 0 ${nz}
|
||||
region box block 0 4 0 4 0 ${nz}
|
||||
region box block 0 4 0 4 0 4
|
||||
create_box 2 box
|
||||
Created orthogonal box = (0 0 0) to (12.7212 12.7212 12.7212)
|
||||
1 by 2 by 2 MPI processor grid
|
||||
create_atoms 1 box
|
||||
Created 128 atoms
|
||||
mass 1 183.84
|
||||
mass 2 4.0026
|
||||
|
||||
set group all type/fraction 2 0.05 3590153 # Change 5% of W to He
|
||||
5 settings made for type/fraction
|
||||
group tungsten type 1
|
||||
123 atoms in group tungsten
|
||||
group helium type 2
|
||||
5 atoms in group helium
|
||||
# choose potential
|
||||
|
||||
include W.SNAP_HePair.pot
|
||||
# DATE: 2017-02-20 CONTRIBUTOR: Mitchell Wood mitwood@sandia.gov CITATION: Wood, M. A. and Thompson, A. P. to appear in arxiv Feb2017, W-He and He-He from Juslin, N. and Wirth, B. D. Journal of Nuclear Materials, 423, (2013) p61-63
|
||||
#
|
||||
# Definition of SNAP+ZBL+Tabulated potential.
|
||||
variable zblcutinner equal 4
|
||||
variable zblcutouter equal 4.8
|
||||
variable zblz equal 74
|
||||
|
||||
# Specify hybrid with SNAP, ZBL, and long-range Coulomb
|
||||
|
||||
pair_style hybrid/overlay zbl ${zblcutinner} ${zblcutouter} snap table spline 10000 table spline 10000
|
||||
pair_style hybrid/overlay zbl 4 ${zblcutouter} snap table spline 10000 table spline 10000
|
||||
pair_style hybrid/overlay zbl 4 4.8 snap table spline 10000 table spline 10000
|
||||
pair_coeff 1 1 zbl ${zblz} ${zblz}
|
||||
pair_coeff 1 1 zbl 74 ${zblz}
|
||||
pair_coeff 1 1 zbl 74 74
|
||||
pair_coeff * * snap W_2940_2017_2.snapcoeff W W_2940_2017_2.snapparam W NULL
|
||||
Reading potential file W_2940_2017_2.snapcoeff with DATE: 2017-02-20
|
||||
SNAP Element = W, Radius 0.5, Weight 1
|
||||
Reading potential file W_2940_2017_2.snapparam with DATE: 2017-02-20
|
||||
SNAP keyword rcutfac 4.73442
|
||||
SNAP keyword twojmax 8
|
||||
SNAP keyword gamma 1
|
||||
SNAP keyword rfac0 0.99363
|
||||
SNAP keyword rmin0 0
|
||||
SNAP keyword diagonalstyle 3
|
||||
pair_coeff 2 2 table 1 He_He_JW2013.table HeHe
|
||||
Reading potential file He_He_JW2013.table with DATE: 2017-02-20
|
||||
WARNING: 1 of 4999 force values in table are inconsistent with -dE/dr.
|
||||
Should only be flagged at inflection points (../pair_table.cpp:476)
|
||||
pair_coeff 1 2 table 2 W_He_JW2013.table WHe
|
||||
Reading potential file W_He_JW2013.table with DATE: 2017-02-20
|
||||
WARNING: 3 of 325 force values in table are inconsistent with -dE/dr.
|
||||
Should only be flagged at inflection points (../pair_table.cpp:476)
|
||||
#Hybrid/overlay will take all pair styles and add their contributions equally, order of pair_coeff doesnt matter here
|
||||
#This is not the case for pair_style hybrid ... where only one pair_coeff is read for each type combination, order matters here.
|
||||
|
||||
|
||||
# Setup output
|
||||
|
||||
thermo 10
|
||||
thermo_modify norm yes
|
||||
|
||||
# Set up NVE run
|
||||
|
||||
timestep 0.5e-3
|
||||
neighbor 1.0 bin
|
||||
neigh_modify once no every 1 delay 0 check yes
|
||||
|
||||
# Run MD
|
||||
|
||||
velocity all create 300.0 4928459
|
||||
fix 1 all nve
|
||||
run ${nsteps}
|
||||
run 100
|
||||
Neighbor list info ...
|
||||
update every 1 steps, delay 0 steps, check yes
|
||||
max neighbors/atom: 2000, page size: 100000
|
||||
master list distance cutoff = 5.8
|
||||
ghost atom cutoff = 5.8
|
||||
binsize = 2.9, bins = 5 5 5
|
||||
6 neighbor lists, perpetual/occasional/extra = 6 0 0
|
||||
(1) pair zbl, perpetual, half/full from (2)
|
||||
attributes: half, newton on
|
||||
pair build: halffull/newton/skip
|
||||
stencil: none
|
||||
bin: none
|
||||
(2) pair snap, perpetual, skip from (6)
|
||||
attributes: full, newton on
|
||||
pair build: skip
|
||||
stencil: none
|
||||
bin: none
|
||||
(3) pair table, perpetual, skip from (5)
|
||||
attributes: half, newton on
|
||||
pair build: skip
|
||||
stencil: none
|
||||
bin: none
|
||||
(4) pair table, perpetual, skip from (5)
|
||||
attributes: half, newton on
|
||||
pair build: skip
|
||||
stencil: none
|
||||
bin: none
|
||||
(5) neighbor class addition, perpetual, half/full from (6)
|
||||
attributes: half, newton on
|
||||
pair build: halffull/newton
|
||||
stencil: none
|
||||
bin: none
|
||||
(6) neighbor class addition, perpetual
|
||||
attributes: full, newton on
|
||||
pair build: full/bin/atomonly
|
||||
stencil: full/bin/3d
|
||||
bin: standard
|
||||
Memory usage per processor = 7.65426 Mbytes
|
||||
Step Temp E_pair E_mol TotEng Press
|
||||
0 300 -10.438105 0 -10.39963 -5445.2808
|
||||
10 292.13979 -10.437097 0 -10.39963 -5516.3963
|
||||
20 272.55728 -10.434585 0 -10.399629 -5460.4268
|
||||
30 245.06559 -10.431059 0 -10.399629 -5016.6351
|
||||
40 212.79459 -10.42692 0 -10.399629 -3924.2175
|
||||
50 178.03903 -10.422462 0 -10.399629 -2354.5485
|
||||
60 141.62155 -10.417791 0 -10.399628 -595.41345
|
||||
70 107.24843 -10.413383 0 -10.399628 1138.4107
|
||||
80 79.985938 -10.409886 0 -10.399628 2392.1106
|
||||
90 62.568933 -10.407652 0 -10.399628 3141.7027
|
||||
100 56.697933 -10.406899 0 -10.399628 3583.9538
|
||||
Loop time of 2.8757 on 4 procs for 100 steps with 128 atoms
|
||||
|
||||
Performance: 1.502 ns/day, 15.976 hours/ns, 34.774 timesteps/s
|
||||
99.9% CPU use with 4 MPI tasks x no OpenMP threads
|
||||
|
||||
MPI task timing breakdown:
|
||||
Section | min time | avg time | max time |%varavg| %total
|
||||
---------------------------------------------------------------
|
||||
Pair | 2.7363 | 2.8122 | 2.8636 | 2.9 | 97.79
|
||||
Neigh | 0 | 0 | 0 | 0.0 | 0.00
|
||||
Comm | 0.011014 | 0.062439 | 0.13842 | 19.3 | 2.17
|
||||
Output | 0.00023842 | 0.00025076 | 0.0002861 | 0.0 | 0.01
|
||||
Modify | 9.2506e-05 | 9.9301e-05 | 0.00010395 | 0.0 | 0.00
|
||||
Other | | 0.0006654 | | | 0.02
|
||||
|
||||
Nlocal: 32 ave 32 max 32 min
|
||||
Histogram: 4 0 0 0 0 0 0 0 0 0
|
||||
Nghost: 431 ave 431 max 431 min
|
||||
Histogram: 4 0 0 0 0 0 0 0 0 0
|
||||
Neighs: 856.25 ave 885 max 818 min
|
||||
Histogram: 1 0 0 0 1 0 0 0 1 1
|
||||
FullNghs: 1712.5 ave 1738 max 1658 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 2 1
|
||||
|
||||
Total # of neighbors = 6850
|
||||
Ave neighs/atom = 53.5156
|
||||
Neighbor list builds = 0
|
||||
Dangerous builds = 0
|
||||
|
||||
Total wall time: 0:00:02
|
||||
@ -1,5 +1,27 @@
|
||||
# Change Log
|
||||
|
||||
## [2.02.15](https://github.com/kokkos/kokkos/tree/2.02.15) (2017-02-10)
|
||||
[Full Changelog](https://github.com/kokkos/kokkos/compare/2.02.07...2.02.15)
|
||||
|
||||
**Implemented enhancements:**
|
||||
|
||||
- Containers: Adding block partitioning to StaticCrsGraph [\#625](https://github.com/kokkos/kokkos/issues/625)
|
||||
- Kokkos Make System can induce Errors on Cray Volta System [\#610](https://github.com/kokkos/kokkos/issues/610)
|
||||
- OpenMP: error out if KOKKOS\_HAVE\_OPENMP is defined but not \_OPENMP [\#605](https://github.com/kokkos/kokkos/issues/605)
|
||||
- CMake: fix standalone build with tests [\#604](https://github.com/kokkos/kokkos/issues/604)
|
||||
- Change README \(that GitHub shows when opening Kokkos project page\) to tell users how to submit PRs [\#597](https://github.com/kokkos/kokkos/issues/597)
|
||||
- Add correctness testing for all operators of Atomic View [\#420](https://github.com/kokkos/kokkos/issues/420)
|
||||
- Allow assignment of Views with compatible memory spaces [\#290](https://github.com/kokkos/kokkos/issues/290)
|
||||
- Build only one version of Kokkos library for tests [\#213](https://github.com/kokkos/kokkos/issues/213)
|
||||
- Clean out old KOKKOS\_HAVE\_CXX11 macros clauses [\#156](https://github.com/kokkos/kokkos/issues/156)
|
||||
- Harmonize Macro names [\#150](https://github.com/kokkos/kokkos/issues/150)
|
||||
|
||||
**Fixed bugs:**
|
||||
|
||||
- Cray and PGI: Kokkos\_Parallel\_Reduce [\#634](https://github.com/kokkos/kokkos/issues/634)
|
||||
- Kokkos Make System can induce Errors on Cray Volta System [\#610](https://github.com/kokkos/kokkos/issues/610)
|
||||
- Normal\(\) function random number generator doesn't give the expected distribution [\#592](https://github.com/kokkos/kokkos/issues/592)
|
||||
|
||||
## [2.02.07](https://github.com/kokkos/kokkos/tree/2.02.07) (2016-12-16)
|
||||
[Full Changelog](https://github.com/kokkos/kokkos/compare/2.02.01...2.02.07)
|
||||
|
||||
|
||||
@ -1,4 +1,3 @@
|
||||
|
||||
IF(COMMAND TRIBITS_PACKAGE_DECL)
|
||||
SET(KOKKOS_HAS_TRILINOS ON CACHE BOOL "")
|
||||
ELSE()
|
||||
@ -8,6 +7,7 @@ ENDIF()
|
||||
IF(NOT KOKKOS_HAS_TRILINOS)
|
||||
CMAKE_MINIMUM_REQUIRED(VERSION 2.8.11 FATAL_ERROR)
|
||||
INCLUDE(cmake/tribits.cmake)
|
||||
SET(CMAKE_CXX_STANDARD 11)
|
||||
ENDIF()
|
||||
|
||||
#
|
||||
|
||||
@ -7,6 +7,8 @@
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Kokkos is licensed under 3-clause BSD terms of use:
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
|
||||
@ -7,7 +7,7 @@ CXXFLAGS=$(CCFLAGS)
|
||||
#Options: OpenMP,Serial,Pthreads,Cuda
|
||||
KOKKOS_DEVICES ?= "OpenMP"
|
||||
#KOKKOS_DEVICES ?= "Pthreads"
|
||||
#Options: KNC,SNB,HSW,Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal61,ARMv80,ARMv81,ARMv8-ThunderX,BGQ,Power7,Power8,KNL,BDW,SKX
|
||||
#Options: KNC,SNB,HSW,Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal61,ARMv80,ARMv81,ARMv8-ThunderX,BGQ,Power7,Power8,Power9,KNL,BDW,SKX
|
||||
KOKKOS_ARCH ?= ""
|
||||
#Options: yes,no
|
||||
KOKKOS_DEBUG ?= "no"
|
||||
@ -192,7 +192,8 @@ KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX := $(strip $(shell echo $(KOKKOS_ARCH) |
|
||||
KOKKOS_INTERNAL_USE_ARCH_BGQ := $(strip $(shell echo $(KOKKOS_ARCH) | grep BGQ | wc -l))
|
||||
KOKKOS_INTERNAL_USE_ARCH_POWER7 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Power7 | wc -l))
|
||||
KOKKOS_INTERNAL_USE_ARCH_POWER8 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Power8 | wc -l))
|
||||
KOKKOS_INTERNAL_USE_ARCH_IBM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_BGQ)+$(KOKKOS_INTERNAL_USE_ARCH_POWER7)+$(KOKKOS_INTERNAL_USE_ARCH_POWER8) | bc))
|
||||
KOKKOS_INTERNAL_USE_ARCH_POWER9 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Power9 | wc -l))
|
||||
KOKKOS_INTERNAL_USE_ARCH_IBM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_BGQ)+$(KOKKOS_INTERNAL_USE_ARCH_POWER7)+$(KOKKOS_INTERNAL_USE_ARCH_POWER8)+$(KOKKOS_INTERNAL_USE_ARCH_POWER9) | bc))
|
||||
|
||||
#AMD based
|
||||
KOKKOS_INTERNAL_USE_ARCH_AMDAVX := $(strip $(shell echo $(KOKKOS_ARCH) | grep AMDAVX | wc -l))
|
||||
@ -206,7 +207,7 @@ KOKKOS_INTERNAL_USE_ARCH_AVX512XEON := $(strip $(shell echo $(KOKKOS_INTERNAL_US
|
||||
# Decide what ISA level we are able to support
|
||||
KOKKOS_INTERNAL_USE_ISA_X86_64 := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_SNB)+$(KOKKOS_INTERNAL_USE_ARCH_HSW)+$(KOKKOS_INTERNAL_USE_ARCH_BDW)+$(KOKKOS_INTERNAL_USE_ARCH_KNL)+$(KOKKOS_INTERNAL_USE_ARCH_SKX) | bc ))
|
||||
KOKKOS_INTERNAL_USE_ISA_KNC := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KNC) | bc ))
|
||||
KOKKOS_INTERNAL_USE_ISA_POWERPCLE := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_POWER8) | bc ))
|
||||
KOKKOS_INTERNAL_USE_ISA_POWERPCLE := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_POWER8)+$(KOKKOS_INTERNAL_USE_ARCH_POWER9) | bc ))
|
||||
|
||||
#Incompatible flags?
|
||||
KOKKOS_INTERNAL_USE_ARCH_MULTIHOST := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_AVX)+$(KOKKOS_INTERNAL_USE_ARCH_AVX2)+$(KOKKOS_INTERNAL_USE_ARCH_KNC)+$(KOKKOS_INTERNAL_USE_ARCH_IBM)+$(KOKKOS_INTERNAL_USE_ARCH_AMDAVX)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV80)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV81)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX)>1" | bc ))
|
||||
@ -453,6 +454,17 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER8), 1)
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER9), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_POWER9 1" >> KokkosCore_config.tmp )
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
|
||||
|
||||
else
|
||||
# Assume that this is a really a GNU compiler or it could be XL on P9
|
||||
KOKKOS_CXXFLAGS += -mcpu=power9 -mtune=power9
|
||||
KOKKOS_LDFLAGS += -mcpu=power9 -mtune=power9
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX2), 1)
|
||||
tmp := $(shell echo "\#define KOKKOS_ARCH_AVX2 1" >> KokkosCore_config.tmp )
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
|
||||
|
||||
@ -5,6 +5,9 @@ Kokkos is designed to target complex node architectures with N-level memory
|
||||
hierarchies and multiple types of execution resources. It currently can use
|
||||
OpenMP, Pthreads and CUDA as backend programming models.
|
||||
|
||||
Kokkos is licensed under standard 3-clause BSD terms of use. For specifics
|
||||
see the LICENSE file contained in the repository or distribution.
|
||||
|
||||
The core developers of Kokkos are Carter Edwards and Christian Trott
|
||||
at the Computer Science Research Institute of the Sandia National
|
||||
Laboratories.
|
||||
@ -152,3 +155,11 @@ multiple MPI ranks]) you can set CUDA_MANAGED_FORCE_DEVICE_ALLOC=1.
|
||||
This will enforce proper UVM allocations, but can lead to errors if
|
||||
more than a single GPU is used by a single process.
|
||||
|
||||
===========================================================================
|
||||
====Contributing===========================================================
|
||||
===========================================================================
|
||||
|
||||
Contributions to Kokkos are welcome. In order to do so, please open an issue
|
||||
where a feature request or bug can be discussed. Then issue a pull request
|
||||
with your contribution. Pull requests must be issued against the develop branch.
|
||||
|
||||
|
||||
@ -1014,7 +1014,7 @@ namespace Kokkos {
|
||||
}
|
||||
};
|
||||
|
||||
#if defined(KOKKOS_HAVE_CUDA) && defined(__CUDACC__)
|
||||
#if defined(KOKKOS_ENABLE_CUDA) && defined(__CUDACC__)
|
||||
|
||||
template<>
|
||||
class Random_XorShift1024<Kokkos::Cuda> {
|
||||
|
||||
@ -49,7 +49,7 @@
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
#ifdef KOKKOS_ENABLE_CUDA
|
||||
|
||||
#include <TestRandom.hpp>
|
||||
#include <TestSort.hpp>
|
||||
@ -106,5 +106,5 @@ CUDA_SORT_UNSIGNED(171)
|
||||
#undef CUDA_SORT_UNSIGNED
|
||||
}
|
||||
|
||||
#endif /* #ifdef KOKKOS_HAVE_CUDA */
|
||||
#endif /* #ifdef KOKKOS_ENABLE_CUDA */
|
||||
|
||||
|
||||
@ -52,7 +52,7 @@
|
||||
|
||||
namespace Test {
|
||||
|
||||
#ifdef KOKKOS_HAVE_OPENMP
|
||||
#ifdef KOKKOS_ENABLE_OPENMP
|
||||
class openmp : public ::testing::Test {
|
||||
protected:
|
||||
static void SetUpTestCase()
|
||||
|
||||
@ -55,7 +55,7 @@
|
||||
|
||||
namespace Test {
|
||||
|
||||
#ifdef KOKKOS_HAVE_SERIAL
|
||||
#ifdef KOKKOS_ENABLE_SERIAL
|
||||
class serial : public ::testing::Test {
|
||||
protected:
|
||||
static void SetUpTestCase()
|
||||
@ -93,7 +93,7 @@ SERIAL_SORT_UNSIGNED(171)
|
||||
#undef SERIAL_RANDOM_XORSHIFT1024
|
||||
#undef SERIAL_SORT_UNSIGNED
|
||||
|
||||
#endif // KOKKOS_HAVE_SERIAL
|
||||
#endif // KOKKOS_ENABLE_SERIAL
|
||||
} // namespace Test
|
||||
|
||||
|
||||
|
||||
@ -55,7 +55,7 @@
|
||||
|
||||
namespace Test {
|
||||
|
||||
#ifdef KOKKOS_HAVE_PTHREAD
|
||||
#ifdef KOKKOS_ENABLE_PTHREAD
|
||||
class threads : public ::testing::Test {
|
||||
protected:
|
||||
static void SetUpTestCase()
|
||||
|
||||
@ -4,13 +4,31 @@ INCLUDE(CTest)
|
||||
cmake_policy(SET CMP0054 NEW)
|
||||
|
||||
IF(NOT DEFINED ${PROJECT_NAME})
|
||||
project(Kokkos)
|
||||
project(KokkosCMake)
|
||||
ENDIF()
|
||||
|
||||
IF(NOT DEFINED ${${PROJECT_NAME}_ENABLE_DEBUG}})
|
||||
MESSAGE(WARNING "The project name is: ${PROJECT_NAME}")
|
||||
|
||||
IF(NOT DEFINED ${PROJECT_NAME}_ENABLE_OpenMP)
|
||||
SET(${PROJECT_NAME}_ENABLE_OpenMP OFF)
|
||||
ENDIF()
|
||||
|
||||
IF(NOT DEFINED ${PROJECT_NAME}_ENABLE_DEBUG)
|
||||
SET(${PROJECT_NAME}_ENABLE_DEBUG OFF)
|
||||
ENDIF()
|
||||
|
||||
IF(NOT DEFINED ${PROJECT_NAME}_ENABLE_CXX11)
|
||||
SET(${PROJECT_NAME}_ENABLE_CXX11 ON)
|
||||
ENDIF()
|
||||
|
||||
IF(NOT DEFINED ${PROJECT_NAME}_ENABLE_TESTS)
|
||||
SET(${PROJECT_NAME}_ENABLE_TESTS OFF)
|
||||
ENDIF()
|
||||
|
||||
IF(NOT DEFINED TPL_ENABLE_Pthread)
|
||||
SET(TPL_ENABLE_Pthread OFF)
|
||||
ENDIF()
|
||||
|
||||
FUNCTION(ASSERT_DEFINED VARS)
|
||||
FOREACH(VAR ${VARS})
|
||||
IF(NOT DEFINED ${VAR})
|
||||
@ -70,9 +88,11 @@ ENDMACRO()
|
||||
|
||||
|
||||
MACRO(TRIBITS_ADD_TEST_DIRECTORIES)
|
||||
IF(${${PROJECT_NAME}_ENABLE_TESTS})
|
||||
FOREACH(TEST_DIR ${ARGN})
|
||||
ADD_SUBDIRECTORY(${TEST_DIR})
|
||||
ENDFOREACH()
|
||||
ENDIF()
|
||||
ENDMACRO()
|
||||
|
||||
MACRO(TRIBITS_ADD_EXAMPLE_DIRECTORIES)
|
||||
@ -264,11 +284,11 @@ FUNCTION(TRIBITS_ADD_EXECUTABLE EXE_NAME)
|
||||
SET(EXE_BINARY_NAME ${PACKAGE_NAME}_${EXE_BINARY_NAME})
|
||||
ENDIF()
|
||||
|
||||
IF (PARSE_TESTONLY)
|
||||
SET(EXCLUDE_FROM_ALL_KEYWORD "EXCLUDE_FROM_ALL")
|
||||
ELSE()
|
||||
SET(EXCLUDE_FROM_ALL_KEYWORD)
|
||||
ENDIF()
|
||||
# IF (PARSE_TESTONLY)
|
||||
# SET(EXCLUDE_FROM_ALL_KEYWORD "EXCLUDE_FROM_ALL")
|
||||
# ELSE()
|
||||
# SET(EXCLUDE_FROM_ALL_KEYWORD)
|
||||
# ENDIF()
|
||||
ADD_EXECUTABLE(${EXE_BINARY_NAME} ${EXCLUDE_FROM_ALL_KEYWORD} ${EXE_SOURCES})
|
||||
|
||||
TARGET_LINK_AND_INCLUDE_LIBRARIES(${EXE_BINARY_NAME} ${LINK_LIBS})
|
||||
@ -470,9 +490,8 @@ ENDMACRO(TRIBITS_SUBPACKAGE_POSTPROCESS)
|
||||
|
||||
MACRO(TRIBITS_PACKAGE_DECL NAME)
|
||||
|
||||
PROJECT(${NAME})
|
||||
STRING(TOUPPER ${PROJECT_NAME} PROJECT_NAME_UC)
|
||||
SET(PACKAGE_NAME ${PROJECT_NAME})
|
||||
SET(PACKAGE_NAME ${NAME})
|
||||
SET(${PACKAGE_NAME}_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
|
||||
STRING(TOUPPER ${PACKAGE_NAME} PACKAGE_NAME_UC)
|
||||
|
||||
SET(TRIBITS_DEPS_DIR "${CMAKE_SOURCE_DIR}/cmake/deps")
|
||||
@ -489,7 +508,7 @@ MACRO(TRIBITS_PROCESS_SUBPACKAGES)
|
||||
FOREACH(SUBPACKAGE ${SUBPACKAGES})
|
||||
GET_FILENAME_COMPONENT(SUBPACKAGE_CMAKE ${SUBPACKAGE} DIRECTORY)
|
||||
GET_FILENAME_COMPONENT(SUBPACKAGE_DIR ${SUBPACKAGE_CMAKE} DIRECTORY)
|
||||
ADD_SUBDIRECTORY(${SUBPACKAGE_DIR})
|
||||
ADD_SUBDIRECTORY(${CMAKE_BINARY_DIR}/../${SUBPACKAGE_DIR})
|
||||
ENDFOREACH()
|
||||
ENDMACRO(TRIBITS_PROCESS_SUBPACKAGES)
|
||||
|
||||
|
||||
@ -4,3 +4,4 @@ tag: 2.01.10 date: 09:27:2016 master: e4119325 develop: e6cda11e
|
||||
tag: 2.02.00 date: 10:30:2016 master: 6c90a581 develop: ca3dd56e
|
||||
tag: 2.02.01 date: 11:01:2016 master: 9c698c86 develop: b0072304
|
||||
tag: 2.02.07 date: 12:16:2016 master: 4b4cc4ba develop: 382c0966
|
||||
tag: 2.02.15 date: 02:10:2017 master: 8c64cd93 develop: 28dea8b6
|
||||
|
||||
@ -10,12 +10,18 @@ set -o pipefail
|
||||
|
||||
MACHINE=""
|
||||
HOSTNAME=$(hostname)
|
||||
PROCESSOR=`uname -p`
|
||||
|
||||
if [[ "$HOSTNAME" =~ (white|ride).* ]]; then
|
||||
MACHINE=white
|
||||
elif [[ "$HOSTNAME" =~ .*bowman.* ]]; then
|
||||
MACHINE=bowman
|
||||
elif [[ "$HOSTNAME" =~ node.* ]]; then # Warning: very generic name
|
||||
if [[ "$PROCESSOR" = "aarch64" ]]; then
|
||||
MACHINE=sullivan
|
||||
else
|
||||
MACHINE=shepard
|
||||
fi
|
||||
elif [[ "$HOSTNAME" =~ apollo ]]; then
|
||||
MACHINE=apollo
|
||||
elif [ ! -z "$SEMS_MODULEFILES_ROOT" ]; then
|
||||
@ -27,6 +33,7 @@ fi
|
||||
|
||||
GCC_BUILD_LIST="OpenMP,Pthread,Serial,OpenMP_Serial,Pthread_Serial"
|
||||
IBM_BUILD_LIST="OpenMP,Serial,OpenMP_Serial"
|
||||
ARM_GCC_BUILD_LIST="OpenMP,Serial,OpenMP_Serial"
|
||||
INTEL_BUILD_LIST="OpenMP,Pthread,Serial,OpenMP_Serial,Pthread_Serial"
|
||||
CLANG_BUILD_LIST="Pthread,Serial,Pthread_Serial"
|
||||
CUDA_BUILD_LIST="Cuda_OpenMP,Cuda_Pthread,Cuda_Serial"
|
||||
@ -200,6 +207,23 @@ elif [ "$MACHINE" = "bowman" ]; then
|
||||
if [ -z "$ARCH_FLAG" ]; then
|
||||
ARCH_FLAG="--arch=KNL"
|
||||
fi
|
||||
|
||||
NUM_JOBS_TO_RUN_IN_PARALLEL=2
|
||||
|
||||
elif [ "$MACHINE" = "sullivan" ]; then
|
||||
source /etc/profile.d/modules.sh
|
||||
SKIP_HWLOC=True
|
||||
export SLURM_TASKS_PER_NODE=96
|
||||
|
||||
BASE_MODULE_LIST="<COMPILER_NAME>/<COMPILER_VERSION>"
|
||||
|
||||
# Format: (compiler module-list build-list exe-name warning-flag)
|
||||
COMPILERS=("gcc/5.3.0 $BASE_MODULE_LIST $ARM_GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS")
|
||||
|
||||
if [ -z "$ARCH_FLAG" ]; then
|
||||
ARCH_FLAG="--arch=ARMv8-ThunderX"
|
||||
fi
|
||||
|
||||
NUM_JOBS_TO_RUN_IN_PARALLEL=2
|
||||
|
||||
elif [ "$MACHINE" = "shepard" ]; then
|
||||
@ -298,6 +322,7 @@ echo "--debug: Run tests in debug. Defaults to False"
|
||||
echo "--test-script: Test this script, not Kokkos"
|
||||
echo "--skip-hwloc: Do not do hwloc tests"
|
||||
echo "--num=N: Number of jobs to run in parallel"
|
||||
echo "--spot-check: Minimal test set to issue pull request"
|
||||
echo "--dry-run: Just print what would be executed"
|
||||
echo "--build-only: Just do builds, don't run anything"
|
||||
echo "--opt-flag=FLAG: Optimization flag (default: -O3)"
|
||||
|
||||
66
lib/kokkos/config/test_kokkos_master_develop_promotion.sh
Executable file
66
lib/kokkos/config/test_kokkos_master_develop_promotion.sh
Executable file
@ -0,0 +1,66 @@
|
||||
#!/bin/bash
|
||||
|
||||
. /etc/profile.d/modules.sh
|
||||
|
||||
echo "build-dir $1"
|
||||
echo "backend $2"
|
||||
echo "module $3"
|
||||
echo "compiler $4"
|
||||
echo "cxxflags $5"
|
||||
echo "architecrure $6"
|
||||
echo "debug $7"
|
||||
echo "kokkos-options $8"
|
||||
echo "kokkos-cuda-options $9"
|
||||
echo "hwloc $9"
|
||||
|
||||
NOW=`date "+%Y%m%d%H%M%S"`
|
||||
BASEDIR="$1-$NOW"
|
||||
|
||||
mkdir $BASEDIR
|
||||
cd $BASEDIR
|
||||
|
||||
module load $2
|
||||
|
||||
if [ $9 == "yes" ]; then
|
||||
if [ $7 == "debug" ]; then
|
||||
../generate_makefile.sh --with-devices=$2 \
|
||||
--compiler=$4 \
|
||||
--cxxflags=$5 \
|
||||
--arch=$6 \
|
||||
--debug \
|
||||
--with-options=$8 \
|
||||
--with-cuda-options=$9
|
||||
--with-hwloc=${HWLOC_ROOT}
|
||||
else
|
||||
../generate_makefile.sh --with-devices=$2 \
|
||||
--compiler=$4 \
|
||||
--cxxflags=$5 \
|
||||
--arch=$6 \
|
||||
--debug \
|
||||
--with-options=$8 \
|
||||
--with-cuda-options=$9
|
||||
--with-hwloc=${HWLOC_ROOT}
|
||||
fi
|
||||
else
|
||||
if [ $7 == "debug" ]; then
|
||||
../generate_makefile.sh --with-devices=$2 \
|
||||
--compiler=$4 \
|
||||
--cxxflags=$5 \
|
||||
--arch=$6 \
|
||||
--debug \
|
||||
--with-options=$8 \
|
||||
--with-cuda-options=$9
|
||||
else
|
||||
../generate_makefile.sh --with-devices=$2 \
|
||||
--compiler=$4 \
|
||||
--cxxflags=$5 \
|
||||
--arch=$6 \
|
||||
--debug \
|
||||
--with-options=$8 \
|
||||
--with-cuda-options=$9
|
||||
fi
|
||||
fi
|
||||
|
||||
|
||||
make test
|
||||
return $?
|
||||
@ -27,13 +27,13 @@ cd ${TRILINOS_UPDATED_PATH}
|
||||
echo ""
|
||||
echo ""
|
||||
echo "Trilinos State:"
|
||||
git log --pretty=oneline --since=2.days
|
||||
SHA=`git log --pretty=oneline --since=2.days | head -n 2 | tail -n 1 | awk '{print $1}'`
|
||||
git log --pretty=oneline --since=7.days
|
||||
SHA=`git log --pretty=oneline --since=7.days | head -n 2 | tail -n 1 | awk '{print $1}'`
|
||||
cd ..
|
||||
|
||||
cd ${TRILINOS_PRISTINE_PATH}
|
||||
git status
|
||||
git log --pretty=oneline --since=2.days
|
||||
git log --pretty=oneline --since=7.days
|
||||
echo "Checkout develop"
|
||||
git checkout develop
|
||||
echo "Pull"
|
||||
@ -46,5 +46,5 @@ cd ${TRILINOS_PRISTINE_PATH}
|
||||
echo ""
|
||||
echo ""
|
||||
echo "Trilinos Pristine State:"
|
||||
git log --pretty=oneline --since=2.days
|
||||
git log --pretty=oneline --since=7.days
|
||||
cd ..
|
||||
|
||||
@ -52,7 +52,7 @@
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
#if defined( KOKKOS_ENABLE_CUDA )
|
||||
|
||||
#include <TestDynRankView.hpp>
|
||||
|
||||
@ -106,4 +106,4 @@ TEST_F( cuda, unordered_map_performance_far)
|
||||
|
||||
}
|
||||
|
||||
#endif /* #if defined( KOKKOS_HAVE_CUDA ) */
|
||||
#endif /* #if defined( KOKKOS_ENABLE_CUDA ) */
|
||||
|
||||
@ -164,12 +164,10 @@ struct UnorderedMapTest
|
||||
|
||||
};
|
||||
|
||||
//#define KOKKOS_COLLECT_UNORDERED_MAP_METRICS
|
||||
|
||||
template <typename Device, bool Near>
|
||||
void run_performance_tests(std::string const & base_file_name)
|
||||
{
|
||||
#if defined(KOKKOS_COLLECT_UNORDERED_MAP_METRICS)
|
||||
#if 0
|
||||
std::string metrics_file_name = base_file_name + std::string("-metrics.csv");
|
||||
std::string length_file_name = base_file_name + std::string("-length.csv");
|
||||
std::string distance_file_name = base_file_name + std::string("-distance.csv");
|
||||
|
||||
@ -586,13 +586,13 @@ private:
|
||||
#if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK )
|
||||
|
||||
// rank of the calling operator - included as first argument in ARG
|
||||
#define KOKKOS_VIEW_OPERATOR_VERIFY( ARG ) \
|
||||
#define KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( ARG ) \
|
||||
DynRankView::template verify_space< Kokkos::Impl::ActiveExecutionMemorySpace >::check(); \
|
||||
Kokkos::Experimental::Impl::dyn_rank_view_verify_operator_bounds ARG ;
|
||||
|
||||
#else
|
||||
|
||||
#define KOKKOS_VIEW_OPERATOR_VERIFY( ARG ) \
|
||||
#define KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( ARG ) \
|
||||
DynRankView::template verify_space< Kokkos::Impl::ActiveExecutionMemorySpace >::check();
|
||||
|
||||
#endif
|
||||
@ -609,9 +609,9 @@ public:
|
||||
reference_type operator()() const
|
||||
{
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (0 , this->rank() , NULL , m_map) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (0 , this->rank() , NULL , m_map) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (0 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (0 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map) )
|
||||
#endif
|
||||
return implementation_map().reference();
|
||||
//return m_map.reference(0,0,0,0,0,0,0);
|
||||
@ -650,9 +650,9 @@ public:
|
||||
operator()(const iType & i0 ) const
|
||||
{
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (1 , this->rank() , NULL , m_map , i0) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (1 , this->rank() , NULL , m_map , i0) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (1 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (1 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0) )
|
||||
#endif
|
||||
return m_map.reference(i0);
|
||||
}
|
||||
@ -663,9 +663,9 @@ public:
|
||||
operator()(const iType & i0 ) const
|
||||
{
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (1 , this->rank() , NULL , m_map , i0) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (1 , this->rank() , NULL , m_map , i0) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (1 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (1 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0) )
|
||||
#endif
|
||||
return m_map.reference(i0,0,0,0,0,0,0);
|
||||
}
|
||||
@ -677,9 +677,9 @@ public:
|
||||
operator()(const iType0 & i0 , const iType1 & i1 ) const
|
||||
{
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (2 , this->rank() , NULL , m_map , i0 , i1) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (2 , this->rank() , NULL , m_map , i0 , i1) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (2 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (2 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1) )
|
||||
#endif
|
||||
return m_map.reference(i0,i1);
|
||||
}
|
||||
@ -690,9 +690,9 @@ public:
|
||||
operator()(const iType0 & i0 , const iType1 & i1 ) const
|
||||
{
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (2 , this->rank() , NULL , m_map , i0 , i1) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (2 , this->rank() , NULL , m_map , i0 , i1) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (2 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (2 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1) )
|
||||
#endif
|
||||
return m_map.reference(i0,i1,0,0,0,0,0);
|
||||
}
|
||||
@ -704,9 +704,9 @@ public:
|
||||
operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 ) const
|
||||
{
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (3 , this->rank() , NULL , m_map , i0 , i1 , i2) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (3 , this->rank() , NULL , m_map , i0 , i1 , i2) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (3 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (3 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2) )
|
||||
#endif
|
||||
return m_map.reference(i0,i1,i2);
|
||||
}
|
||||
@ -717,9 +717,9 @@ public:
|
||||
operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 ) const
|
||||
{
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (3 , this->rank() , NULL , m_map , i0 , i1 , i2) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (3 , this->rank() , NULL , m_map , i0 , i1 , i2) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (3 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (3 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2) )
|
||||
#endif
|
||||
return m_map.reference(i0,i1,i2,0,0,0,0);
|
||||
}
|
||||
@ -731,9 +731,9 @@ public:
|
||||
operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ) const
|
||||
{
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (4 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (4 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (4 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (4 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3) )
|
||||
#endif
|
||||
return m_map.reference(i0,i1,i2,i3);
|
||||
}
|
||||
@ -744,9 +744,9 @@ public:
|
||||
operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ) const
|
||||
{
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (4 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (4 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (4 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (4 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3) )
|
||||
#endif
|
||||
return m_map.reference(i0,i1,i2,i3,0,0,0);
|
||||
}
|
||||
@ -758,9 +758,9 @@ public:
|
||||
operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 ) const
|
||||
{
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (5 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3, i4) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (5 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3, i4) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (5 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (5 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4) )
|
||||
#endif
|
||||
return m_map.reference(i0,i1,i2,i3,i4);
|
||||
}
|
||||
@ -771,9 +771,9 @@ public:
|
||||
operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 ) const
|
||||
{
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (5 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3, i4) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (5 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3, i4) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (5 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (5 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4) )
|
||||
#endif
|
||||
return m_map.reference(i0,i1,i2,i3,i4,0,0);
|
||||
}
|
||||
@ -785,9 +785,9 @@ public:
|
||||
operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 , const iType5 & i5 ) const
|
||||
{
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (6 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3, i4 , i5) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (6 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3, i4 , i5) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (6 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (6 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5) )
|
||||
#endif
|
||||
return m_map.reference(i0,i1,i2,i3,i4,i5);
|
||||
}
|
||||
@ -798,9 +798,9 @@ public:
|
||||
operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 , const iType5 & i5 ) const
|
||||
{
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (6 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3, i4 , i5) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (6 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3, i4 , i5) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (6 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (6 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5) )
|
||||
#endif
|
||||
return m_map.reference(i0,i1,i2,i3,i4,i5,0);
|
||||
}
|
||||
@ -812,14 +812,14 @@ public:
|
||||
operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 , const iType5 & i5 , const iType6 & i6 ) const
|
||||
{
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (7 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3, i4 , i5 , i6) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (7 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3, i4 , i5 , i6) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (7 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,i6) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (7 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,i6) )
|
||||
#endif
|
||||
return m_map.reference(i0,i1,i2,i3,i4,i5,i6);
|
||||
}
|
||||
|
||||
#undef KOKKOS_VIEW_OPERATOR_VERIFY
|
||||
#undef KOKKOS_IMPL_VIEW_OPERATOR_VERIFY
|
||||
|
||||
//----------------------------------------
|
||||
// Standard constructor, destructor, and assignment operators...
|
||||
@ -960,7 +960,7 @@ public:
|
||||
alloc_prop prop( arg_prop );
|
||||
|
||||
//------------------------------------------------------------
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
#if defined( KOKKOS_ENABLE_CUDA )
|
||||
// If allocating in CudaUVMSpace must fence before and after
|
||||
// the allocation to protect against possible concurrent access
|
||||
// on the CPU and the GPU.
|
||||
@ -976,7 +976,7 @@ public:
|
||||
record = m_map.allocate_shared( prop , Impl::DynRankDimTraits<typename traits::specialize>::createLayout(arg_layout) );
|
||||
|
||||
//------------------------------------------------------------
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
#if defined( KOKKOS_ENABLE_CUDA )
|
||||
if ( std::is_same< Kokkos::CudaUVMSpace , typename traits::device_type::memory_space >::value ) {
|
||||
traits::device_type::memory_space::execution_space::fence();
|
||||
}
|
||||
|
||||
@ -51,6 +51,80 @@
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
namespace Impl {
|
||||
template<class RowOffsetsType, class RowBlockOffsetsType>
|
||||
struct StaticCrsGraphBalancerFunctor {
|
||||
typedef typename RowOffsetsType::non_const_value_type int_type;
|
||||
RowOffsetsType row_offsets;
|
||||
RowBlockOffsetsType row_block_offsets;
|
||||
|
||||
int_type cost_per_row, num_blocks;
|
||||
|
||||
StaticCrsGraphBalancerFunctor(RowOffsetsType row_offsets_,
|
||||
RowBlockOffsetsType row_block_offsets_,
|
||||
int_type cost_per_row_, int_type num_blocks_):
|
||||
row_offsets(row_offsets_),
|
||||
row_block_offsets(row_block_offsets_),
|
||||
cost_per_row(cost_per_row_),
|
||||
num_blocks(num_blocks_){}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (const int_type& iRow) const {
|
||||
const int_type num_rows = row_offsets.dimension_0()-1;
|
||||
const int_type num_entries = row_offsets(num_rows);
|
||||
const int_type total_cost = num_entries + num_rows*cost_per_row;
|
||||
|
||||
const double cost_per_workset = 1.0*total_cost/num_blocks;
|
||||
|
||||
const int_type row_cost = row_offsets(iRow+1)-row_offsets(iRow) + cost_per_row;
|
||||
|
||||
int_type count = row_offsets(iRow+1) + cost_per_row*iRow;
|
||||
|
||||
if(iRow == num_rows-1) row_block_offsets(num_blocks) = num_rows;
|
||||
|
||||
if(true) {
|
||||
int_type current_block = (count-row_cost-cost_per_row)/cost_per_workset;
|
||||
int_type end_block = count/cost_per_workset;
|
||||
|
||||
// Handle some corner cases for the last two blocks.
|
||||
if(current_block >= num_blocks-2) {
|
||||
if((current_block == num_blocks-2) && (count >= (current_block + 1) * cost_per_workset)) {
|
||||
int_type row = iRow;
|
||||
int_type cc = count-row_cost-cost_per_row;
|
||||
int_type block = cc/cost_per_workset;
|
||||
while((block>0) && (block==current_block)) {
|
||||
cc = row_offsets(row)+row*cost_per_row;
|
||||
block = cc/cost_per_workset;
|
||||
row--;
|
||||
}
|
||||
if((count-cc-row_cost-cost_per_row) < num_entries-row_offsets(iRow+1)) {
|
||||
row_block_offsets(current_block+1) = iRow+1;
|
||||
} else {
|
||||
row_block_offsets(current_block+1) = iRow;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if((count >= (current_block + 1) * cost_per_workset) ||
|
||||
(iRow+2 == row_offsets.dimension_0())) {
|
||||
if(end_block>current_block+1) {
|
||||
int_type num_block = end_block-current_block;
|
||||
row_block_offsets(current_block+1) = iRow;
|
||||
for(int_type block = current_block+2; block <= end_block; block++)
|
||||
if((block<current_block+2+(num_block-1)/2))
|
||||
row_block_offsets(block) = iRow;
|
||||
else
|
||||
row_block_offsets(block) = iRow+1;
|
||||
} else {
|
||||
row_block_offsets(current_block+1) = iRow+1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/// \class StaticCrsGraph
|
||||
/// \brief Compressed row storage array.
|
||||
///
|
||||
@ -100,19 +174,23 @@ public:
|
||||
typedef StaticCrsGraph< DataType , array_layout , typename traits::host_mirror_space , SizeType > HostMirror;
|
||||
typedef View< const size_type* , array_layout, device_type > row_map_type;
|
||||
typedef View< DataType* , array_layout, device_type > entries_type;
|
||||
typedef View< const size_type* , array_layout, device_type > row_block_type;
|
||||
|
||||
entries_type entries;
|
||||
row_map_type row_map;
|
||||
row_block_type row_block_offsets;
|
||||
|
||||
//! Construct an empty view.
|
||||
StaticCrsGraph () : entries(), row_map() {}
|
||||
StaticCrsGraph () : entries(), row_map(), row_block_offsets() {}
|
||||
|
||||
//! Copy constructor (shallow copy).
|
||||
StaticCrsGraph (const StaticCrsGraph& rhs) : entries (rhs.entries), row_map (rhs.row_map)
|
||||
StaticCrsGraph (const StaticCrsGraph& rhs) : entries (rhs.entries), row_map (rhs.row_map),
|
||||
row_block_offsets(rhs.row_block_offsets)
|
||||
{}
|
||||
|
||||
template<class EntriesType, class RowMapType>
|
||||
StaticCrsGraph (const EntriesType& entries_,const RowMapType& row_map_) : entries (entries_), row_map (row_map_)
|
||||
StaticCrsGraph (const EntriesType& entries_,const RowMapType& row_map_) : entries (entries_), row_map (row_map_),
|
||||
row_block_offsets()
|
||||
{}
|
||||
|
||||
/** \brief Assign to a view of the rhs array.
|
||||
@ -122,6 +200,7 @@ public:
|
||||
StaticCrsGraph& operator= (const StaticCrsGraph& rhs) {
|
||||
entries = rhs.entries;
|
||||
row_map = rhs.row_map;
|
||||
row_block_offsets = rhs.row_block_offsets;
|
||||
return *this;
|
||||
}
|
||||
|
||||
@ -130,12 +209,30 @@ public:
|
||||
*/
|
||||
~StaticCrsGraph() {}
|
||||
|
||||
/** \brief Return number of rows in the graph
|
||||
*/
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
size_type numRows() const {
|
||||
return (row_map.dimension_0 () != 0) ?
|
||||
row_map.dimension_0 () - static_cast<size_type> (1) :
|
||||
static_cast<size_type> (0);
|
||||
}
|
||||
|
||||
/** \brief Create a row partitioning into a given number of blocks
|
||||
* balancing non-zeros + a fixed cost per row.
|
||||
*/
|
||||
void create_block_partitioning(size_type num_blocks, size_type fix_cost_per_row = 4) {
|
||||
View< size_type* , array_layout, device_type >
|
||||
block_offsets("StatisCrsGraph::load_balance_offsets",num_blocks+1);
|
||||
|
||||
Impl::StaticCrsGraphBalancerFunctor<row_map_type,View< size_type* , array_layout, device_type > >
|
||||
partitioner(row_map,block_offsets,fix_cost_per_row,num_blocks);
|
||||
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<execution_space>(0,numRows()),partitioner);
|
||||
Kokkos::fence();
|
||||
|
||||
row_block_offsets = block_offsets;
|
||||
}
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
@ -72,7 +72,7 @@ private:
|
||||
|
||||
|
||||
public:
|
||||
#ifdef KOKKOS_CUDA_USE_UVM
|
||||
#ifdef KOKKOS_ENABLE_CUDA_UVM
|
||||
KOKKOS_INLINE_FUNCTION Scalar& operator() (int i) const {return DV::h_view(i);};
|
||||
KOKKOS_INLINE_FUNCTION Scalar& operator[] (int i) const {return DV::h_view(i);};
|
||||
#else
|
||||
|
||||
@ -133,11 +133,11 @@ uint32_t MurmurHash3_x86_32 ( const void * key, int len, uint32_t seed )
|
||||
defined( __GNUG__ ) /* GNU C++ */ || \
|
||||
defined( __clang__ )
|
||||
|
||||
#define KOKKOS_MAY_ALIAS __attribute__((__may_alias__))
|
||||
#define KOKKOS_IMPL_MAY_ALIAS __attribute__((__may_alias__))
|
||||
|
||||
#else
|
||||
|
||||
#define KOKKOS_MAY_ALIAS
|
||||
#define KOKKOS_IMPL_MAY_ALIAS
|
||||
|
||||
#endif
|
||||
|
||||
@ -145,10 +145,10 @@ template <typename T>
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
bool bitwise_equal(T const * const a_ptr, T const * const b_ptr)
|
||||
{
|
||||
typedef uint64_t KOKKOS_MAY_ALIAS T64;
|
||||
typedef uint32_t KOKKOS_MAY_ALIAS T32;
|
||||
typedef uint16_t KOKKOS_MAY_ALIAS T16;
|
||||
typedef uint8_t KOKKOS_MAY_ALIAS T8;
|
||||
typedef uint64_t KOKKOS_IMPL_MAY_ALIAS T64;
|
||||
typedef uint32_t KOKKOS_IMPL_MAY_ALIAS T32;
|
||||
typedef uint16_t KOKKOS_IMPL_MAY_ALIAS T16;
|
||||
typedef uint8_t KOKKOS_IMPL_MAY_ALIAS T8;
|
||||
|
||||
enum {
|
||||
NUM_8 = sizeof(T),
|
||||
@ -188,7 +188,7 @@ bool bitwise_equal(T const * const a_ptr, T const * const b_ptr)
|
||||
|
||||
|
||||
|
||||
#undef KOKKOS_MAY_ALIAS
|
||||
#undef KOKKOS_IMPL_MAY_ALIAS
|
||||
|
||||
}} // namespace Kokkos::Impl
|
||||
|
||||
|
||||
@ -69,15 +69,17 @@ create_mirror( const StaticCrsGraph<DataType,Arg1Type,Arg2Type,SizeType > & view
|
||||
|
||||
typename staticcrsgraph_type::HostMirror tmp ;
|
||||
typename staticcrsgraph_type::row_map_type::HostMirror tmp_row_map = create_mirror( view.row_map);
|
||||
typename staticcrsgraph_type::row_block_type::HostMirror tmp_row_block_offsets = create_mirror( view.row_block_offsets);
|
||||
|
||||
// Allocation to match:
|
||||
tmp.row_map = tmp_row_map ; // Assignment of 'const' from 'non-const'
|
||||
tmp.entries = create_mirror( view.entries );
|
||||
|
||||
tmp.row_block_offsets = tmp_row_block_offsets ; // Assignment of 'const' from 'non-const'
|
||||
|
||||
// Deep copy:
|
||||
deep_copy( tmp_row_map , view.row_map );
|
||||
deep_copy( tmp.entries , view.entries );
|
||||
deep_copy( tmp_row_block_offsets , view.row_block_offsets );
|
||||
|
||||
return tmp ;
|
||||
}
|
||||
|
||||
@ -69,7 +69,7 @@
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
#ifdef KOKKOS_ENABLE_CUDA
|
||||
|
||||
namespace Test {
|
||||
|
||||
@ -96,6 +96,18 @@ TEST_F( cuda , staticcrsgraph )
|
||||
{
|
||||
TestStaticCrsGraph::run_test_graph< Kokkos::Cuda >();
|
||||
TestStaticCrsGraph::run_test_graph2< Kokkos::Cuda >();
|
||||
TestStaticCrsGraph::run_test_graph3< Kokkos::Cuda >(1, 0);
|
||||
TestStaticCrsGraph::run_test_graph3< Kokkos::Cuda >(1, 1000);
|
||||
TestStaticCrsGraph::run_test_graph3< Kokkos::Cuda >(1, 10000);
|
||||
TestStaticCrsGraph::run_test_graph3< Kokkos::Cuda >(1, 100000);
|
||||
TestStaticCrsGraph::run_test_graph3< Kokkos::Cuda >(3, 0);
|
||||
TestStaticCrsGraph::run_test_graph3< Kokkos::Cuda >(3, 1000);
|
||||
TestStaticCrsGraph::run_test_graph3< Kokkos::Cuda >(3, 10000);
|
||||
TestStaticCrsGraph::run_test_graph3< Kokkos::Cuda >(3, 100000);
|
||||
TestStaticCrsGraph::run_test_graph3< Kokkos::Cuda >(75, 0);
|
||||
TestStaticCrsGraph::run_test_graph3< Kokkos::Cuda >(75, 1000);
|
||||
TestStaticCrsGraph::run_test_graph3< Kokkos::Cuda >(75, 10000);
|
||||
TestStaticCrsGraph::run_test_graph3< Kokkos::Cuda >(75, 100000);
|
||||
}
|
||||
|
||||
|
||||
@ -225,5 +237,5 @@ TEST_F(cuda, ErrorReporter)
|
||||
|
||||
}
|
||||
|
||||
#endif /* #ifdef KOKKOS_HAVE_CUDA */
|
||||
#endif /* #ifdef KOKKOS_ENABLE_CUDA */
|
||||
|
||||
|
||||
@ -1298,7 +1298,7 @@ public:
|
||||
// For CUDA the constant random access View does not return
|
||||
// an lvalue reference due to retrieving through texture cache
|
||||
// therefore not allowed to query the underlying pointer.
|
||||
#if defined(KOKKOS_HAVE_CUDA)
|
||||
#if defined(KOKKOS_ENABLE_CUDA)
|
||||
if ( ! std::is_same< typename device::execution_space , Kokkos::Cuda >::value )
|
||||
#endif
|
||||
{
|
||||
@ -1408,7 +1408,7 @@ public:
|
||||
ASSERT_EQ( ds5.dimension_4() , ds5plus.dimension_4() );
|
||||
ASSERT_EQ( ds5.dimension_5() , ds5plus.dimension_5() );
|
||||
|
||||
#if ! defined( KOKKOS_HAVE_CUDA ) || defined ( KOKKOS_USE_CUDA_UVM )
|
||||
#if ! defined( KOKKOS_ENABLE_CUDA ) || defined ( KOKKOS_ENABLE_CUDA_UVM )
|
||||
ASSERT_EQ( & ds5(1,1,1,1,0) - & ds5plus(1,1,1,1,0) , 0 );
|
||||
ASSERT_EQ( & ds5(1,1,1,1,0,0) - & ds5plus(1,1,1,1,0,0) , 0 ); // passing argument to rank beyond the view's rank is allowed iff it is a 0.
|
||||
#endif
|
||||
|
||||
@ -200,7 +200,7 @@ struct ErrorReporterDriverUseLambda : public ErrorReporterDriverBase<DeviceType>
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef KOKKOS_HAVE_OPENMP
|
||||
#ifdef KOKKOS_ENABLE_OPENMP
|
||||
struct ErrorReporterDriverNativeOpenMP : public ErrorReporterDriverBase<Kokkos::OpenMP>
|
||||
{
|
||||
typedef ErrorReporterDriverBase<Kokkos::OpenMP> driver_base;
|
||||
|
||||
@ -68,7 +68,7 @@
|
||||
|
||||
namespace Test {
|
||||
|
||||
#ifdef KOKKOS_HAVE_OPENMP
|
||||
#ifdef KOKKOS_ENABLE_OPENMP
|
||||
class openmp : public ::testing::Test {
|
||||
protected:
|
||||
static void SetUpTestCase()
|
||||
@ -109,6 +109,18 @@ TEST_F( openmp , staticcrsgraph )
|
||||
{
|
||||
TestStaticCrsGraph::run_test_graph< Kokkos::OpenMP >();
|
||||
TestStaticCrsGraph::run_test_graph2< Kokkos::OpenMP >();
|
||||
TestStaticCrsGraph::run_test_graph3< Kokkos::OpenMP >(1, 0);
|
||||
TestStaticCrsGraph::run_test_graph3< Kokkos::OpenMP >(1, 1000);
|
||||
TestStaticCrsGraph::run_test_graph3< Kokkos::OpenMP >(1, 10000);
|
||||
TestStaticCrsGraph::run_test_graph3< Kokkos::OpenMP >(1, 100000);
|
||||
TestStaticCrsGraph::run_test_graph3< Kokkos::OpenMP >(3, 0);
|
||||
TestStaticCrsGraph::run_test_graph3< Kokkos::OpenMP >(3, 1000);
|
||||
TestStaticCrsGraph::run_test_graph3< Kokkos::OpenMP >(3, 10000);
|
||||
TestStaticCrsGraph::run_test_graph3< Kokkos::OpenMP >(3, 100000);
|
||||
TestStaticCrsGraph::run_test_graph3< Kokkos::OpenMP >(75, 0);
|
||||
TestStaticCrsGraph::run_test_graph3< Kokkos::OpenMP >(75, 1000);
|
||||
TestStaticCrsGraph::run_test_graph3< Kokkos::OpenMP >(75, 10000);
|
||||
TestStaticCrsGraph::run_test_graph3< Kokkos::OpenMP >(75, 100000);
|
||||
}
|
||||
|
||||
#define OPENMP_INSERT_TEST( name, num_nodes, num_inserts, num_duplicates, repeat, near ) \
|
||||
|
||||
@ -45,7 +45,7 @@
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
|
||||
#if ! defined(KOKKOS_HAVE_SERIAL)
|
||||
#if ! defined(KOKKOS_ENABLE_SERIAL)
|
||||
# error "It doesn't make sense to build this file unless the Kokkos::Serial device is enabled. If you see this message, it probably means that there is an error in Kokkos' CMake build infrastructure."
|
||||
#else
|
||||
|
||||
@ -91,6 +91,18 @@ TEST_F( serial , staticcrsgraph )
|
||||
{
|
||||
TestStaticCrsGraph::run_test_graph< Kokkos::Serial >();
|
||||
TestStaticCrsGraph::run_test_graph2< Kokkos::Serial >();
|
||||
TestStaticCrsGraph::run_test_graph3< Kokkos::Serial >(1, 0);
|
||||
TestStaticCrsGraph::run_test_graph3< Kokkos::Serial >(1, 1000);
|
||||
TestStaticCrsGraph::run_test_graph3< Kokkos::Serial >(1, 10000);
|
||||
TestStaticCrsGraph::run_test_graph3< Kokkos::Serial >(1, 100000);
|
||||
TestStaticCrsGraph::run_test_graph3< Kokkos::Serial >(3, 0);
|
||||
TestStaticCrsGraph::run_test_graph3< Kokkos::Serial >(3, 1000);
|
||||
TestStaticCrsGraph::run_test_graph3< Kokkos::Serial >(3, 10000);
|
||||
TestStaticCrsGraph::run_test_graph3< Kokkos::Serial >(3, 100000);
|
||||
TestStaticCrsGraph::run_test_graph3< Kokkos::Serial >(75, 0);
|
||||
TestStaticCrsGraph::run_test_graph3< Kokkos::Serial >(75, 1000);
|
||||
TestStaticCrsGraph::run_test_graph3< Kokkos::Serial >(75, 10000);
|
||||
TestStaticCrsGraph::run_test_graph3< Kokkos::Serial >(75, 100000);
|
||||
}
|
||||
|
||||
TEST_F( serial, complex )
|
||||
@ -178,6 +190,6 @@ TEST_F(serial, ErrorReporter)
|
||||
|
||||
} // namespace Test
|
||||
|
||||
#endif // KOKKOS_HAVE_SERIAL
|
||||
#endif // KOKKOS_ENABLE_SERIAL
|
||||
|
||||
|
||||
|
||||
@ -144,6 +144,44 @@ void run_test_graph2()
|
||||
}
|
||||
}
|
||||
|
||||
template< class Space >
|
||||
void run_test_graph3(size_t B, size_t N)
|
||||
{
|
||||
srand(10310);
|
||||
|
||||
typedef Kokkos::StaticCrsGraph< int , Space > dView ;
|
||||
typedef typename dView::HostMirror hView ;
|
||||
|
||||
const unsigned LENGTH = 2000 ;
|
||||
|
||||
std::vector< size_t > sizes( LENGTH );
|
||||
|
||||
size_t total_length = 0 ;
|
||||
|
||||
for ( size_t i = 0 ; i < LENGTH ; ++i ) {
|
||||
sizes[i] = rand()%1000;
|
||||
}
|
||||
|
||||
sizes[1] = N;
|
||||
sizes[1998] = N;
|
||||
|
||||
for ( size_t i = 0 ; i < LENGTH ; ++i ) {
|
||||
total_length += sizes[i];
|
||||
}
|
||||
|
||||
int C = 0;
|
||||
dView dx = Kokkos::create_staticcrsgraph<dView>( "test" , sizes );
|
||||
dx.create_block_partitioning(B,C);
|
||||
hView hx = Kokkos::create_mirror( dx );
|
||||
|
||||
for( size_t i = 0; i<B; i++) {
|
||||
size_t ne = 0;
|
||||
for(size_t j = hx.row_block_offsets(i); j<hx.row_block_offsets(i+1); j++)
|
||||
ne += hx.row_map(j+1)-hx.row_map(j)+C;
|
||||
|
||||
ASSERT_FALSE((ne>2*((hx.row_map(hx.numRows())+C*hx.numRows())/B))&&(hx.row_block_offsets(i+1)>hx.row_block_offsets(i)+1));
|
||||
}
|
||||
}
|
||||
} /* namespace TestStaticCrsGraph */
|
||||
|
||||
|
||||
|
||||
@ -45,7 +45,7 @@
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
|
||||
#if defined( KOKKOS_HAVE_PTHREAD )
|
||||
#if defined( KOKKOS_ENABLE_PTHREAD )
|
||||
|
||||
#include <Kokkos_Bitset.hpp>
|
||||
#include <Kokkos_UnorderedMap.hpp>
|
||||
@ -106,6 +106,18 @@ TEST_F( threads , staticcrsgraph )
|
||||
{
|
||||
TestStaticCrsGraph::run_test_graph< Kokkos::Threads >();
|
||||
TestStaticCrsGraph::run_test_graph2< Kokkos::Threads >();
|
||||
TestStaticCrsGraph::run_test_graph3< Kokkos::Threads >(1, 0);
|
||||
TestStaticCrsGraph::run_test_graph3< Kokkos::Threads >(1, 1000);
|
||||
TestStaticCrsGraph::run_test_graph3< Kokkos::Threads >(1, 10000);
|
||||
TestStaticCrsGraph::run_test_graph3< Kokkos::Threads >(1, 100000);
|
||||
TestStaticCrsGraph::run_test_graph3< Kokkos::Threads >(3, 0);
|
||||
TestStaticCrsGraph::run_test_graph3< Kokkos::Threads >(3, 1000);
|
||||
TestStaticCrsGraph::run_test_graph3< Kokkos::Threads >(3, 10000);
|
||||
TestStaticCrsGraph::run_test_graph3< Kokkos::Threads >(3, 100000);
|
||||
TestStaticCrsGraph::run_test_graph3< Kokkos::Threads >(75, 0);
|
||||
TestStaticCrsGraph::run_test_graph3< Kokkos::Threads >(75, 1000);
|
||||
TestStaticCrsGraph::run_test_graph3< Kokkos::Threads >(75, 10000);
|
||||
TestStaticCrsGraph::run_test_graph3< Kokkos::Threads >(75, 100000);
|
||||
}
|
||||
|
||||
/*TEST_F( threads, bitset )
|
||||
@ -190,5 +202,5 @@ TEST_F(threads, ErrorReporter)
|
||||
} // namespace Test
|
||||
|
||||
|
||||
#endif /* #if defined( KOKKOS_HAVE_PTHREAD ) */
|
||||
#endif /* #if defined( KOKKOS_ENABLE_PTHREAD ) */
|
||||
|
||||
|
||||
@ -48,7 +48,7 @@
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
#if defined( KOKKOS_ENABLE_CUDA )
|
||||
|
||||
#include <impl/Kokkos_Timer.hpp>
|
||||
|
||||
@ -185,5 +185,5 @@ TEST_F( cuda, texture_double )
|
||||
|
||||
} // namespace Test
|
||||
|
||||
#endif /* #if defined( KOKKOS_HAVE_CUDA ) */
|
||||
#endif /* #if defined( KOKKOS_ENABLE_CUDA ) */
|
||||
|
||||
|
||||
@ -51,8 +51,8 @@
|
||||
// macro, so I'm commenting out the macro to avoid compiler complaints
|
||||
// about an unused macro.
|
||||
|
||||
// #define KOKKOS_MACRO_IMPL_TO_STRING( X ) #X
|
||||
// #define KOKKOS_MACRO_TO_STRING( X ) KOKKOS_MACRO_IMPL_TO_STRING( X )
|
||||
// #define KOKKOS_IMPL_MACRO_TO_STRING( X ) #X
|
||||
// #define KOKKOS_MACRO_TO_STRING( X ) KOKKOS_IMPL_MACRO_TO_STRING( X )
|
||||
|
||||
//------------------------------------------------------------------------
|
||||
|
||||
|
||||
@ -45,17 +45,17 @@
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
|
||||
#if defined( KOKKOS_HAVE_OPENMP )
|
||||
#if defined( KOKKOS_ENABLE_OPENMP )
|
||||
|
||||
typedef Kokkos::OpenMP TestHostDevice ;
|
||||
const char TestHostDeviceName[] = "Kokkos::OpenMP" ;
|
||||
|
||||
#elif defined( KOKKOS_HAVE_PTHREAD )
|
||||
#elif defined( KOKKOS_ENABLE_PTHREAD )
|
||||
|
||||
typedef Kokkos::Threads TestHostDevice ;
|
||||
const char TestHostDeviceName[] = "Kokkos::Threads" ;
|
||||
|
||||
#elif defined( KOKKOS_HAVE_SERIAL )
|
||||
#elif defined( KOKKOS_ENABLE_SERIAL )
|
||||
|
||||
typedef Kokkos::Serial TestHostDevice ;
|
||||
const char TestHostDeviceName[] = "Kokkos::Serial" ;
|
||||
|
||||
@ -47,7 +47,7 @@
|
||||
#include <Kokkos_Macros.hpp>
|
||||
|
||||
/* only compile this file if CUDA is enabled for Kokkos */
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
#ifdef KOKKOS_ENABLE_CUDA
|
||||
|
||||
#include <string>
|
||||
#include <Kokkos_Parallel.hpp>
|
||||
@ -112,7 +112,7 @@ CudaSpace::size_type * cuda_internal_scratch_unified( const CudaSpace::size_type
|
||||
#if defined( __CUDACC__ )
|
||||
|
||||
/** \brief Access to constant memory on the device */
|
||||
#ifdef KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE
|
||||
#ifdef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE
|
||||
|
||||
__device__ __constant__
|
||||
extern unsigned long kokkos_impl_cuda_constant_memory_buffer[] ;
|
||||
@ -135,7 +135,7 @@ namespace Impl {
|
||||
}
|
||||
}
|
||||
__device__ __constant__
|
||||
#ifdef KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE
|
||||
#ifdef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE
|
||||
extern
|
||||
#endif
|
||||
Kokkos::Impl::CudaLockArraysStruct kokkos_impl_cuda_lock_arrays ;
|
||||
@ -245,7 +245,7 @@ struct CudaParallelLaunch< DriverType , true > {
|
||||
// Copy functor to constant memory on the device
|
||||
cudaMemcpyToSymbol( kokkos_impl_cuda_constant_memory_buffer , & driver , sizeof(DriverType) );
|
||||
|
||||
#ifndef KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE
|
||||
#ifndef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE
|
||||
Kokkos::Impl::CudaLockArraysStruct locks;
|
||||
locks.atomic = atomic_lock_array_cuda_space_ptr(false);
|
||||
locks.scratch = scratch_lock_array_cuda_space_ptr(false);
|
||||
@ -287,7 +287,7 @@ struct CudaParallelLaunch< DriverType , false > {
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE
|
||||
#ifndef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE
|
||||
Kokkos::Impl::CudaLockArraysStruct locks;
|
||||
locks.atomic = atomic_lock_array_cuda_space_ptr(false);
|
||||
locks.scratch = scratch_lock_array_cuda_space_ptr(false);
|
||||
@ -314,5 +314,5 @@ struct CudaParallelLaunch< DriverType , false > {
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#endif /* defined( __CUDACC__ ) */
|
||||
#endif /* defined( KOKKOS_HAVE_CUDA ) */
|
||||
#endif /* defined( KOKKOS_ENABLE_CUDA ) */
|
||||
#endif /* #ifndef KOKKOS_CUDAEXEC_HPP */
|
||||
|
||||
@ -50,7 +50,7 @@
|
||||
#include <Kokkos_Macros.hpp>
|
||||
|
||||
/* only compile this file if CUDA is enabled for Kokkos */
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
#ifdef KOKKOS_ENABLE_CUDA
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
#include <Kokkos_Cuda.hpp>
|
||||
@ -910,5 +910,5 @@ void* cuda_resize_scratch_space(size_t bytes, bool force_shrink) {
|
||||
|
||||
}
|
||||
}
|
||||
#endif // KOKKOS_HAVE_CUDA
|
||||
#endif // KOKKOS_ENABLE_CUDA
|
||||
|
||||
|
||||
@ -47,7 +47,7 @@
|
||||
#include <Kokkos_Macros.hpp>
|
||||
|
||||
/* only compile this file if CUDA is enabled for Kokkos */
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
#ifdef KOKKOS_ENABLE_CUDA
|
||||
|
||||
#include <impl/Kokkos_Traits.hpp>
|
||||
|
||||
@ -176,7 +176,7 @@ public:
|
||||
|
||||
}} // namespace Kokkos::Impl
|
||||
|
||||
#endif //KOKKOS_HAVE_CUDA
|
||||
#endif //KOKKOS_ENABLE_CUDA
|
||||
|
||||
#endif // #ifndef KOKKOS_CUDA_ALLOCATION_TRACKING_HPP
|
||||
|
||||
|
||||
@ -47,7 +47,7 @@
|
||||
#include <Kokkos_Macros.hpp>
|
||||
|
||||
/* only compile this file if CUDA is enabled for Kokkos */
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
#ifdef KOKKOS_ENABLE_CUDA
|
||||
|
||||
namespace Kokkos { namespace Impl {
|
||||
|
||||
@ -65,5 +65,5 @@ inline void cuda_internal_safe_call( cudaError e , const char * name, const char
|
||||
|
||||
}} // namespace Kokkos::Impl
|
||||
|
||||
#endif //KOKKOS_HAVE_CUDA
|
||||
#endif //KOKKOS_ENABLE_CUDA
|
||||
#endif //KOKKOS_CUDA_ERROR_HPP
|
||||
|
||||
@ -47,7 +47,7 @@
|
||||
#include <Kokkos_Core.hpp>
|
||||
|
||||
/* only compile this file if CUDA is enabled for Kokkos */
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
#ifdef KOKKOS_ENABLE_CUDA
|
||||
|
||||
#include <Cuda/Kokkos_Cuda_Error.hpp>
|
||||
#include <Cuda/Kokkos_Cuda_Internal.hpp>
|
||||
@ -64,7 +64,7 @@
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
|
||||
#ifdef KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE
|
||||
#ifdef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE
|
||||
|
||||
__device__ __constant__
|
||||
unsigned long kokkos_impl_cuda_constant_memory_buffer[ Kokkos::Impl::CudaTraits::ConstantMemoryUsage / sizeof(unsigned long) ] ;
|
||||
@ -299,8 +299,8 @@ void CudaInternal::print_configuration( std::ostream & s ) const
|
||||
{
|
||||
const CudaInternalDevices & dev_info = CudaInternalDevices::singleton();
|
||||
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
s << "macro KOKKOS_HAVE_CUDA : defined" << std::endl ;
|
||||
#if defined( KOKKOS_ENABLE_CUDA )
|
||||
s << "macro KOKKOS_ENABLE_CUDA : defined" << std::endl ;
|
||||
#endif
|
||||
#if defined( CUDA_VERSION )
|
||||
s << "macro CUDA_VERSION = " << CUDA_VERSION
|
||||
@ -500,7 +500,7 @@ void CudaInternal::initialize( int cuda_device_id , int stream_count )
|
||||
Kokkos::Impl::throw_runtime_exception( msg.str() );
|
||||
}
|
||||
|
||||
#ifdef KOKKOS_CUDA_USE_UVM
|
||||
#ifdef KOKKOS_ENABLE_CUDA_UVM
|
||||
if(!cuda_launch_blocking()) {
|
||||
std::cout << "Kokkos::Cuda::initialize WARNING: Cuda is allocating into UVMSpace by default" << std::endl;
|
||||
std::cout << " without setting CUDA_LAUNCH_BLOCKING=1." << std::endl;
|
||||
@ -531,7 +531,7 @@ void CudaInternal::initialize( int cuda_device_id , int stream_count )
|
||||
// Init the array for used for arbitrarily sized atomics
|
||||
Impl::init_lock_arrays_cuda_space();
|
||||
|
||||
#ifdef KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE
|
||||
#ifdef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE
|
||||
Kokkos::Impl::CudaLockArraysStruct locks;
|
||||
locks.atomic = atomic_lock_array_cuda_space_ptr(false);
|
||||
locks.scratch = scratch_lock_array_cuda_space_ptr(false);
|
||||
@ -773,6 +773,6 @@ void Cuda::fence()
|
||||
|
||||
} // namespace Kokkos
|
||||
|
||||
#endif // KOKKOS_HAVE_CUDA
|
||||
#endif // KOKKOS_ENABLE_CUDA
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
|
||||
@ -47,7 +47,7 @@
|
||||
#include <Kokkos_Macros.hpp>
|
||||
|
||||
/* only compile this file if CUDA is enabled for Kokkos */
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
#ifdef KOKKOS_ENABLE_CUDA
|
||||
|
||||
#include <Cuda/Kokkos_Cuda_Error.hpp>
|
||||
|
||||
@ -197,6 +197,6 @@ struct CudaGetOptBlockSize<DriverType,false> {
|
||||
|
||||
}} // namespace Kokkos::Impl
|
||||
|
||||
#endif // KOKKOS_HAVE_CUDA
|
||||
#endif // KOKKOS_ENABLE_CUDA
|
||||
#endif /* #ifndef KOKKOS_CUDA_INTERNAL_HPP */
|
||||
|
||||
|
||||
@ -51,7 +51,7 @@
|
||||
#include <Kokkos_Macros.hpp>
|
||||
|
||||
/* only compile this file if CUDA is enabled for Kokkos */
|
||||
#if defined( __CUDACC__ ) && defined( KOKKOS_HAVE_CUDA )
|
||||
#if defined( __CUDACC__ ) && defined( KOKKOS_ENABLE_CUDA )
|
||||
|
||||
#include <utility>
|
||||
#include <Kokkos_Parallel.hpp>
|
||||
|
||||
@ -47,7 +47,7 @@
|
||||
#include <Kokkos_Macros.hpp>
|
||||
|
||||
/* only compile this file if CUDA is enabled for Kokkos */
|
||||
#if defined( __CUDACC__ ) && defined( KOKKOS_HAVE_CUDA )
|
||||
#if defined( __CUDACC__ ) && defined( KOKKOS_ENABLE_CUDA )
|
||||
|
||||
#include <utility>
|
||||
|
||||
@ -312,7 +312,7 @@ void cuda_intra_block_reduce_scan( const FunctorType & functor ,
|
||||
( rtid_intra & 16 ) ? 16 : 0 ))));
|
||||
|
||||
if ( ! ( rtid_intra + n < blockDim.y ) ) n = 0 ;
|
||||
#ifdef KOKKOS_CUDA_CLANG_WORKAROUND
|
||||
#ifdef KOKKOS_IMPL_CUDA_CLANG_WORKAROUND
|
||||
BLOCK_SCAN_STEP(tdata_intra,n,4) __syncthreads();//__threadfence_block();
|
||||
BLOCK_SCAN_STEP(tdata_intra,n,3) __syncthreads();//__threadfence_block();
|
||||
BLOCK_SCAN_STEP(tdata_intra,n,2) __syncthreads();//__threadfence_block();
|
||||
|
||||
@ -43,7 +43,7 @@
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
|
||||
#if defined( KOKKOS_HAVE_CUDA ) && defined( KOKKOS_ENABLE_TASKDAG )
|
||||
#if defined( KOKKOS_ENABLE_CUDA ) && defined( KOKKOS_ENABLE_TASKDAG )
|
||||
|
||||
#include <impl/Kokkos_TaskQueue_impl.hpp>
|
||||
|
||||
@ -174,6 +174,6 @@ printf("cuda_task_queue_execute after\n");
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#endif /* #if defined( KOKKOS_HAVE_CUDA ) && defined( KOKKOS_ENABLE_TASKDAG ) */
|
||||
#endif /* #if defined( KOKKOS_ENABLE_CUDA ) && defined( KOKKOS_ENABLE_TASKDAG ) */
|
||||
|
||||
|
||||
|
||||
@ -46,7 +46,7 @@
|
||||
#include <Kokkos_Macros.hpp>
|
||||
|
||||
/* only compile this file if CUDA is enabled for Kokkos */
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
#ifdef KOKKOS_ENABLE_CUDA
|
||||
|
||||
#include <Kokkos_Cuda.hpp>
|
||||
|
||||
@ -294,5 +294,5 @@ namespace Impl {
|
||||
|
||||
}
|
||||
|
||||
#endif // KOKKOS_HAVE_CUDA
|
||||
#endif // KOKKOS_ENABLE_CUDA
|
||||
#endif
|
||||
|
||||
@ -45,7 +45,7 @@
|
||||
#define KOKKOS_EXPERIMENTAL_CUDA_VIEW_HPP
|
||||
|
||||
/* only compile this file if CUDA is enabled for Kokkos */
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
#if defined( KOKKOS_ENABLE_CUDA )
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
@ -144,7 +144,7 @@ struct CudaTextureFetch {
|
||||
{}
|
||||
};
|
||||
|
||||
#if defined( KOKKOS_CUDA_USE_LDG_INTRINSIC )
|
||||
#if defined( KOKKOS_ENABLE_CUDA_LDG_INTRINSIC )
|
||||
|
||||
template< typename ValueType , typename AliasType >
|
||||
struct CudaLDGFetch {
|
||||
@ -261,7 +261,7 @@ public:
|
||||
>::type
|
||||
>::type ;
|
||||
|
||||
#if defined( KOKKOS_CUDA_USE_LDG_INTRINSIC )
|
||||
#if defined( KOKKOS_ENABLE_CUDA_LDG_INTRINSIC )
|
||||
using handle_type = Kokkos::Experimental::Impl::CudaLDGFetch< value_type , alias_type > ;
|
||||
#else
|
||||
using handle_type = Kokkos::Experimental::Impl::CudaTextureFetch< value_type , alias_type > ;
|
||||
@ -301,6 +301,6 @@ public:
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#endif /* #if defined( KOKKOS_HAVE_CUDA ) */
|
||||
#endif /* #if defined( KOKKOS_ENABLE_CUDA ) */
|
||||
#endif /* #ifndef KOKKOS_CUDA_VIEW_HPP */
|
||||
|
||||
|
||||
@ -47,7 +47,7 @@
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
#include "Kokkos_Macros.hpp"
|
||||
#if defined( __CUDACC__ ) && defined( KOKKOS_HAVE_CUDA )
|
||||
#if defined( __CUDACC__ ) && defined( KOKKOS_ENABLE_CUDA )
|
||||
|
||||
#include <cuda.h>
|
||||
|
||||
@ -82,6 +82,6 @@ void cuda_abort( const char * const message )
|
||||
|
||||
} // namespace Impl
|
||||
} // namespace Kokkos
|
||||
#endif /* #if defined(__CUDACC__) && defined( KOKKOS_HAVE_CUDA ) */
|
||||
#endif /* #if defined(__CUDACC__) && defined( KOKKOS_ENABLE_CUDA ) */
|
||||
#endif /* #ifndef KOKKOS_CUDA_ABORT_HPP */
|
||||
|
||||
|
||||
@ -48,8 +48,8 @@
|
||||
#include <Kokkos_Parallel.hpp>
|
||||
#include <initializer_list>
|
||||
|
||||
#if defined(KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION) && defined(KOKKOS_HAVE_PRAGMA_IVDEP) && !defined(__CUDA_ARCH__)
|
||||
#define KOKKOS_MDRANGE_IVDEP
|
||||
#if defined(KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION) && defined(KOKKOS_ENABLE_PRAGMA_IVDEP) && !defined(__CUDA_ARCH__)
|
||||
#define KOKKOS_IMPL_MDRANGE_IVDEP
|
||||
#endif
|
||||
|
||||
namespace Kokkos { namespace Experimental {
|
||||
@ -350,7 +350,7 @@ struct MDForFunctor
|
||||
|
||||
if ( MDRange::inner_direction == MDRange::Right ) {
|
||||
for (int i0=b0; i0<e0; ++i0) {
|
||||
#if defined(KOKKOS_MDRANGE_IVDEP)
|
||||
#if defined(KOKKOS_IMPL_MDRANGE_IVDEP)
|
||||
#pragma ivdep
|
||||
#endif
|
||||
for (int i1=b1; i1<e1; ++i1) {
|
||||
@ -358,7 +358,7 @@ struct MDForFunctor
|
||||
}}
|
||||
} else {
|
||||
for (int i1=b1; i1<e1; ++i1) {
|
||||
#if defined(KOKKOS_MDRANGE_IVDEP)
|
||||
#if defined(KOKKOS_IMPL_MDRANGE_IVDEP)
|
||||
#pragma ivdep
|
||||
#endif
|
||||
for (int i0=b0; i0<e0; ++i0) {
|
||||
@ -396,7 +396,7 @@ struct MDForFunctor
|
||||
|
||||
if ( MDRange::inner_direction == MDRange::Right ) {
|
||||
for (int i0=b0; i0<e0; ++i0) {
|
||||
#if defined(KOKKOS_MDRANGE_IVDEP)
|
||||
#if defined(KOKKOS_IMPL_MDRANGE_IVDEP)
|
||||
#pragma ivdep
|
||||
#endif
|
||||
for (int i1=b1; i1<e1; ++i1) {
|
||||
@ -404,7 +404,7 @@ struct MDForFunctor
|
||||
}}
|
||||
} else {
|
||||
for (int i1=b1; i1<e1; ++i1) {
|
||||
#if defined(KOKKOS_MDRANGE_IVDEP)
|
||||
#if defined(KOKKOS_IMPL_MDRANGE_IVDEP)
|
||||
#pragma ivdep
|
||||
#endif
|
||||
for (int i0=b0; i0<e0; ++i0) {
|
||||
@ -501,7 +501,7 @@ struct MDForFunctor
|
||||
if ( MDRange::inner_direction == MDRange::Right ) {
|
||||
for (int i0=b0; i0<e0; ++i0) {
|
||||
for (int i1=b1; i1<e1; ++i1) {
|
||||
#if defined(KOKKOS_MDRANGE_IVDEP)
|
||||
#if defined(KOKKOS_IMPL_MDRANGE_IVDEP)
|
||||
#pragma ivdep
|
||||
#endif
|
||||
for (int i2=b2; i2<e2; ++i2) {
|
||||
@ -510,7 +510,7 @@ struct MDForFunctor
|
||||
} else {
|
||||
for (int i2=b2; i2<e2; ++i2) {
|
||||
for (int i1=b1; i1<e1; ++i1) {
|
||||
#if defined(KOKKOS_MDRANGE_IVDEP)
|
||||
#if defined(KOKKOS_IMPL_MDRANGE_IVDEP)
|
||||
#pragma ivdep
|
||||
#endif
|
||||
for (int i0=b0; i0<e0; ++i0) {
|
||||
@ -555,7 +555,7 @@ struct MDForFunctor
|
||||
if ( MDRange::inner_direction == MDRange::Right ) {
|
||||
for (int i0=b0; i0<e0; ++i0) {
|
||||
for (int i1=b1; i1<e1; ++i1) {
|
||||
#if defined(KOKKOS_MDRANGE_IVDEP)
|
||||
#if defined(KOKKOS_IMPL_MDRANGE_IVDEP)
|
||||
#pragma ivdep
|
||||
#endif
|
||||
for (int i2=b2; i2<e2; ++i2) {
|
||||
@ -564,7 +564,7 @@ struct MDForFunctor
|
||||
} else {
|
||||
for (int i2=b2; i2<e2; ++i2) {
|
||||
for (int i1=b1; i1<e1; ++i1) {
|
||||
#if defined(KOKKOS_MDRANGE_IVDEP)
|
||||
#if defined(KOKKOS_IMPL_MDRANGE_IVDEP)
|
||||
#pragma ivdep
|
||||
#endif
|
||||
for (int i0=b0; i0<e0; ++i0) {
|
||||
|
||||
@ -41,8 +41,8 @@
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_ARRAY
|
||||
#define KOKKOS_ARRAY
|
||||
#ifndef KOKKOS_ARRAY_HPP
|
||||
#define KOKKOS_ARRAY_HPP
|
||||
|
||||
#include <type_traits>
|
||||
#include <algorithm>
|
||||
@ -298,5 +298,5 @@ public:
|
||||
|
||||
} // namespace Kokkos
|
||||
|
||||
#endif /* #ifndef KOKKOS_ARRAY */
|
||||
#endif /* #ifndef KOKKOS_ARRAY_HPP */
|
||||
|
||||
|
||||
@ -73,18 +73,18 @@
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
#if defined(_WIN32)
|
||||
#define KOKKOS_ATOMICS_USE_WINDOWS
|
||||
#define KOKKOS_ENABLE_WINDOWS_ATOMICS
|
||||
#else
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
#if defined( KOKKOS_ENABLE_CUDA )
|
||||
|
||||
// Compiling NVIDIA device code, must use Cuda atomics:
|
||||
|
||||
#define KOKKOS_ATOMICS_USE_CUDA
|
||||
#define KOKKOS_ENABLE_CUDA_ATOMICS
|
||||
#endif
|
||||
|
||||
#if ! defined( KOKKOS_ATOMICS_USE_GCC ) && \
|
||||
! defined( KOKKOS_ATOMICS_USE_INTEL ) && \
|
||||
! defined( KOKKOS_ATOMICS_USE_OMP31 )
|
||||
#if ! defined( KOKKOS_ENABLE_GNU_ATOMICS ) && \
|
||||
! defined( KOKKOS_ENABLE_INTEL_ATOMICS ) && \
|
||||
! defined( KOKKOS_ENABLE_OPENMP_ATOMICS )
|
||||
|
||||
// Compiling for non-Cuda atomic implementation has not been pre-selected.
|
||||
// Choose the best implementation for the detected compiler.
|
||||
@ -94,16 +94,16 @@
|
||||
defined( KOKKOS_COMPILER_CLANG ) || \
|
||||
( defined ( KOKKOS_COMPILER_NVCC ) )
|
||||
|
||||
#define KOKKOS_ATOMICS_USE_GCC
|
||||
#define KOKKOS_ENABLE_GNU_ATOMICS
|
||||
|
||||
#elif defined( KOKKOS_COMPILER_INTEL ) || \
|
||||
defined( KOKKOS_COMPILER_CRAYC )
|
||||
|
||||
#define KOKKOS_ATOMICS_USE_INTEL
|
||||
#define KOKKOS_ENABLE_INTEL_ATOMICS
|
||||
|
||||
#elif defined( _OPENMP ) && ( 201107 <= _OPENMP )
|
||||
|
||||
#define KOKKOS_ATOMICS_USE_OMP31
|
||||
#define KOKKOS_ENABLE_OPENMP_ATOMICS
|
||||
|
||||
#else
|
||||
|
||||
@ -119,7 +119,7 @@
|
||||
// Forward decalaration of functions supporting arbitrary sized atomics
|
||||
// This is necessary since Kokkos_Atomic.hpp is internally included very early
|
||||
// through Kokkos_HostSpace.hpp as well as the allocation tracker.
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
#ifdef KOKKOS_ENABLE_CUDA
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
/// \brief Aquire a lock for the address
|
||||
@ -127,7 +127,7 @@ namespace Impl {
|
||||
/// This function tries to aquire the lock for the hash value derived
|
||||
/// from the provided ptr. If the lock is successfully aquired the
|
||||
/// function returns true. Otherwise it returns false.
|
||||
#ifdef KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE
|
||||
#ifdef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE
|
||||
extern
|
||||
#endif
|
||||
__device__ inline
|
||||
@ -139,7 +139,7 @@ bool lock_address_cuda_space(void* ptr);
|
||||
/// from the provided ptr. This function should only be called
|
||||
/// after previously successfully aquiring a lock with
|
||||
/// lock_address.
|
||||
#ifdef KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE
|
||||
#ifdef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE
|
||||
extern
|
||||
#endif
|
||||
__device__ inline
|
||||
@ -170,16 +170,16 @@ namespace Kokkos {
|
||||
inline
|
||||
const char * atomic_query_version()
|
||||
{
|
||||
#if defined( KOKKOS_ATOMICS_USE_CUDA )
|
||||
return "KOKKOS_ATOMICS_USE_CUDA" ;
|
||||
#elif defined( KOKKOS_ATOMICS_USE_GCC )
|
||||
return "KOKKOS_ATOMICS_USE_GCC" ;
|
||||
#elif defined( KOKKOS_ATOMICS_USE_INTEL )
|
||||
return "KOKKOS_ATOMICS_USE_INTEL" ;
|
||||
#elif defined( KOKKOS_ATOMICS_USE_OMP31 )
|
||||
return "KOKKOS_ATOMICS_USE_OMP31" ;
|
||||
#elif defined( KOKKOS_ATOMICS_USE_WINDOWS )
|
||||
return "KOKKOS_ATOMICS_USE_WINDOWS";
|
||||
#if defined( KOKKOS_ENABLE_CUDA_ATOMICS )
|
||||
return "KOKKOS_ENABLE_CUDA_ATOMICS" ;
|
||||
#elif defined( KOKKOS_ENABLE_GNU_ATOMICS )
|
||||
return "KOKKOS_ENABLE_GNU_ATOMICS" ;
|
||||
#elif defined( KOKKOS_ENABLE_INTEL_ATOMICS )
|
||||
return "KOKKOS_ENABLE_INTEL_ATOMICS" ;
|
||||
#elif defined( KOKKOS_ENABLE_OPENMP_ATOMICS )
|
||||
return "KOKKOS_ENABLE_OPENMP_ATOMICS" ;
|
||||
#elif defined( KOKKOS_ENABLE_WINDOWS_ATOMICS )
|
||||
return "KOKKOS_ENABLE_WINDOWS_ATOMICS";
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
@ -185,15 +185,15 @@ public:
|
||||
|
||||
typedef typename std::conditional
|
||||
< std::is_same< memory_space , Kokkos::HostSpace >::value
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
#if defined( KOKKOS_ENABLE_CUDA )
|
||||
|| std::is_same< memory_space , Kokkos::CudaUVMSpace >::value
|
||||
|| std::is_same< memory_space , Kokkos::CudaHostPinnedSpace >::value
|
||||
#endif /* #if defined( KOKKOS_HAVE_CUDA ) */
|
||||
#endif /* #if defined( KOKKOS_ENABLE_CUDA ) */
|
||||
, memory_space
|
||||
, Kokkos::HostSpace
|
||||
>::type host_memory_space ;
|
||||
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
#if defined( KOKKOS_ENABLE_CUDA )
|
||||
typedef typename std::conditional
|
||||
< std::is_same< execution_space , Kokkos::Cuda >::value
|
||||
, Kokkos::DefaultHostExecutionSpace , execution_space
|
||||
|
||||
@ -49,19 +49,19 @@
|
||||
|
||||
#include <Kokkos_Core_fwd.hpp>
|
||||
|
||||
#if defined( KOKKOS_HAVE_SERIAL )
|
||||
#if defined( KOKKOS_ENABLE_SERIAL )
|
||||
#include <Kokkos_Serial.hpp>
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_HAVE_OPENMP )
|
||||
#if defined( KOKKOS_ENABLE_OPENMP )
|
||||
#include <Kokkos_OpenMP.hpp>
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_HAVE_PTHREAD )
|
||||
#if defined( KOKKOS_ENABLE_PTHREAD )
|
||||
#include <Kokkos_Threads.hpp>
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
#if defined( KOKKOS_ENABLE_CUDA )
|
||||
#include <Kokkos_Cuda.hpp>
|
||||
#endif
|
||||
|
||||
@ -74,9 +74,7 @@
|
||||
#include <Kokkos_hwloc.hpp>
|
||||
#include <Kokkos_Timer.hpp>
|
||||
|
||||
#ifdef KOKKOS_HAVE_CXX11
|
||||
#include <Kokkos_Complex.hpp>
|
||||
#endif
|
||||
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
@ -83,25 +83,25 @@ namespace Kokkos {
|
||||
|
||||
class HostSpace ; ///< Memory space for main process and CPU execution spaces
|
||||
|
||||
#ifdef KOKKOS_HAVE_HBWSPACE
|
||||
#ifdef KOKKOS_ENABLE_HBWSPACE
|
||||
namespace Experimental {
|
||||
class HBWSpace ; /// Memory space for hbw_malloc from memkind (e.g. for KNL processor)
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_HAVE_SERIAL )
|
||||
#if defined( KOKKOS_ENABLE_SERIAL )
|
||||
class Serial ; ///< Execution space main process on CPU
|
||||
#endif // defined( KOKKOS_HAVE_SERIAL )
|
||||
#endif // defined( KOKKOS_ENABLE_SERIAL )
|
||||
|
||||
#if defined( KOKKOS_HAVE_PTHREAD )
|
||||
#if defined( KOKKOS_ENABLE_PTHREAD )
|
||||
class Threads ; ///< Execution space with pthreads back-end
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_HAVE_OPENMP )
|
||||
#if defined( KOKKOS_ENABLE_OPENMP )
|
||||
class OpenMP ; ///< OpenMP execution space
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
#if defined( KOKKOS_ENABLE_CUDA )
|
||||
class CudaSpace ; ///< Memory space on Cuda GPU
|
||||
class CudaUVMSpace ; ///< Memory space on Cuda GPU with UVM
|
||||
class CudaHostPinnedSpace ; ///< Memory space on Host accessible to Cuda GPU
|
||||
@ -122,29 +122,29 @@ struct Device;
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
#if defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_CUDA )
|
||||
#if defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_CUDA )
|
||||
typedef Cuda DefaultExecutionSpace ;
|
||||
#elif defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_OPENMP )
|
||||
#elif defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP )
|
||||
typedef OpenMP DefaultExecutionSpace ;
|
||||
#elif defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_THREADS )
|
||||
#elif defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS )
|
||||
typedef Threads DefaultExecutionSpace ;
|
||||
#elif defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_SERIAL )
|
||||
#elif defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_SERIAL )
|
||||
typedef Serial DefaultExecutionSpace ;
|
||||
#else
|
||||
# error "At least one of the following execution spaces must be defined in order to use Kokkos: Kokkos::Cuda, Kokkos::OpenMP, Kokkos::Serial, or Kokkos::Threads."
|
||||
#endif
|
||||
|
||||
#if defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_OPENMP )
|
||||
#if defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP )
|
||||
typedef OpenMP DefaultHostExecutionSpace ;
|
||||
#elif defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_THREADS )
|
||||
#elif defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS )
|
||||
typedef Threads DefaultHostExecutionSpace ;
|
||||
#elif defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_SERIAL )
|
||||
#elif defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_SERIAL )
|
||||
typedef Serial DefaultHostExecutionSpace ;
|
||||
#elif defined ( KOKKOS_HAVE_OPENMP )
|
||||
#elif defined ( KOKKOS_ENABLE_OPENMP )
|
||||
typedef OpenMP DefaultHostExecutionSpace ;
|
||||
#elif defined ( KOKKOS_HAVE_PTHREAD )
|
||||
#elif defined ( KOKKOS_ENABLE_PTHREAD )
|
||||
typedef Threads DefaultHostExecutionSpace ;
|
||||
#elif defined ( KOKKOS_HAVE_SERIAL )
|
||||
#elif defined ( KOKKOS_ENABLE_SERIAL )
|
||||
typedef Serial DefaultHostExecutionSpace ;
|
||||
#else
|
||||
# error "At least one of the following execution spaces must be defined in order to use Kokkos: Kokkos::OpenMP, Kokkos::Serial, or Kokkos::Threads."
|
||||
@ -161,7 +161,7 @@ namespace Kokkos {
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA ) && defined (KOKKOS_HAVE_CUDA)
|
||||
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA ) && defined (KOKKOS_ENABLE_CUDA)
|
||||
typedef Kokkos::CudaSpace ActiveExecutionMemorySpace ;
|
||||
#elif defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
|
||||
typedef Kokkos::HostSpace ActiveExecutionMemorySpace ;
|
||||
|
||||
@ -48,7 +48,7 @@
|
||||
|
||||
// If CUDA execution space is enabled then use this header file.
|
||||
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
#if defined( KOKKOS_ENABLE_CUDA )
|
||||
|
||||
#include <iosfwd>
|
||||
#include <vector>
|
||||
@ -94,7 +94,7 @@ public:
|
||||
//! Tag this class as a kokkos execution space
|
||||
typedef Cuda execution_space ;
|
||||
|
||||
#if defined( KOKKOS_USE_CUDA_UVM )
|
||||
#if defined( KOKKOS_ENABLE_CUDA_UVM )
|
||||
//! This execution space's preferred memory space.
|
||||
typedef CudaUVMSpace memory_space ;
|
||||
#else
|
||||
@ -240,7 +240,7 @@ struct MemorySpaceAccess
|
||||
enum { deepcopy = false };
|
||||
};
|
||||
|
||||
#if defined( KOKKOS_USE_CUDA_UVM )
|
||||
#if defined( KOKKOS_ENABLE_CUDA_UVM )
|
||||
|
||||
// If forcing use of UVM everywhere
|
||||
// then must assume that CudaUVMSpace
|
||||
@ -297,7 +297,7 @@ struct VerifyExecutionCanAccessMemorySpace
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#endif /* #if defined( KOKKOS_HAVE_CUDA ) */
|
||||
#endif /* #if defined( KOKKOS_ENABLE_CUDA ) */
|
||||
#endif /* #ifndef KOKKOS_CUDA_HPP */
|
||||
|
||||
|
||||
|
||||
@ -46,7 +46,7 @@
|
||||
|
||||
#include <Kokkos_Core_fwd.hpp>
|
||||
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
#if defined( KOKKOS_ENABLE_CUDA )
|
||||
|
||||
#include <iosfwd>
|
||||
#include <typeinfo>
|
||||
@ -939,6 +939,6 @@ public:
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#endif /* #if defined( KOKKOS_HAVE_CUDA ) */
|
||||
#endif /* #if defined( KOKKOS_ENABLE_CUDA ) */
|
||||
#endif /* #define KOKKOS_CUDASPACE_HPP */
|
||||
|
||||
|
||||
@ -48,7 +48,7 @@
|
||||
#include <Kokkos_HostSpace.hpp>
|
||||
|
||||
/*--------------------------------------------------------------------------*/
|
||||
#ifdef KOKKOS_HAVE_HBWSPACE
|
||||
#ifdef KOKKOS_ENABLE_HBWSPACE
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Experimental {
|
||||
@ -102,15 +102,15 @@ public:
|
||||
/// Every memory space has a default execution space. This is
|
||||
/// useful for things like initializing a View (which happens in
|
||||
/// parallel using the View's default execution space).
|
||||
#if defined( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_OPENMP )
|
||||
#if defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP )
|
||||
typedef Kokkos::OpenMP execution_space ;
|
||||
#elif defined( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_THREADS )
|
||||
#elif defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS )
|
||||
typedef Kokkos::Threads execution_space ;
|
||||
#elif defined( KOKKOS_HAVE_OPENMP )
|
||||
#elif defined( KOKKOS_ENABLE_OPENMP )
|
||||
typedef Kokkos::OpenMP execution_space ;
|
||||
#elif defined( KOKKOS_HAVE_PTHREAD )
|
||||
#elif defined( KOKKOS_ENABLE_PTHREAD )
|
||||
typedef Kokkos::Threads execution_space ;
|
||||
#elif defined( KOKKOS_HAVE_SERIAL )
|
||||
#elif defined( KOKKOS_ENABLE_SERIAL )
|
||||
typedef Kokkos::Serial execution_space ;
|
||||
#else
|
||||
# error "At least one of the following host execution spaces must be defined: Kokkos::OpenMP, Kokkos::Serial, or Kokkos::Threads. You might be seeing this message if you disabled the Kokkos::Serial device explicitly using the Kokkos_ENABLE_Serial:BOOL=OFF CMake option, but did not enable any of the other host execution space devices."
|
||||
|
||||
@ -108,15 +108,15 @@ public:
|
||||
/// Every memory space has a default execution space. This is
|
||||
/// useful for things like initializing a View (which happens in
|
||||
/// parallel using the View's default execution space).
|
||||
#if defined( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_OPENMP )
|
||||
#if defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP )
|
||||
typedef Kokkos::OpenMP execution_space ;
|
||||
#elif defined( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_THREADS )
|
||||
#elif defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS )
|
||||
typedef Kokkos::Threads execution_space ;
|
||||
#elif defined( KOKKOS_HAVE_OPENMP )
|
||||
#elif defined( KOKKOS_ENABLE_OPENMP )
|
||||
typedef Kokkos::OpenMP execution_space ;
|
||||
#elif defined( KOKKOS_HAVE_PTHREAD )
|
||||
#elif defined( KOKKOS_ENABLE_PTHREAD )
|
||||
typedef Kokkos::Threads execution_space ;
|
||||
#elif defined( KOKKOS_HAVE_SERIAL )
|
||||
#elif defined( KOKKOS_ENABLE_SERIAL )
|
||||
typedef Kokkos::Serial execution_space ;
|
||||
#else
|
||||
# error "At least one of the following host execution spaces must be defined: Kokkos::OpenMP, Kokkos::Serial, or Kokkos::Threads. You might be seeing this message if you disabled the Kokkos::Serial device explicitly using the Kokkos_ENABLE_Serial:BOOL=OFF CMake option, but did not enable any of the other host execution space devices."
|
||||
|
||||
@ -47,23 +47,24 @@
|
||||
//----------------------------------------------------------------------------
|
||||
/** Pick up configure/build options via #define macros:
|
||||
*
|
||||
* KOKKOS_HAVE_CUDA Kokkos::Cuda execution and memory spaces
|
||||
* KOKKOS_HAVE_PTHREAD Kokkos::Threads execution space
|
||||
* KOKKOS_HAVE_QTHREAD Kokkos::Qthread execution space
|
||||
* KOKKOS_HAVE_OPENMP Kokkos::OpenMP execution space
|
||||
* KOKKOS_HAVE_HWLOC HWLOC library is available
|
||||
* KOKKOS_ENABLE_CUDA Kokkos::Cuda execution and memory spaces
|
||||
* KOKKOS_ENABLE_PTHREAD Kokkos::Threads execution space
|
||||
* KOKKOS_ENABLE_QTHREAD Kokkos::Qthread execution space
|
||||
* KOKKOS_ENABLE_OPENMP Kokkos::OpenMP execution space
|
||||
* KOKKOS_ENABLE_HWLOC HWLOC library is available
|
||||
* KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK insert array bounds checks, is expensive!
|
||||
* KOKKOS_HAVE_CXX11 enable C++11 features
|
||||
*
|
||||
* KOKKOS_HAVE_MPI negotiate MPI/execution space interactions
|
||||
* KOKKOS_ENABLE_MPI negotiate MPI/execution space interactions
|
||||
*
|
||||
* KOKKOS_USE_CUDA_UVM Use CUDA UVM for Cuda memory space
|
||||
* KOKKOS_ENABLE_CUDA_UVM Use CUDA UVM for Cuda memory space
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_DONT_INCLUDE_CORE_CONFIG_H
|
||||
#include <KokkosCore_config.h>
|
||||
#endif
|
||||
|
||||
#include <impl/Kokkos_OldMacros.hpp>
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
/** Pick up compiler specific #define macros:
|
||||
*
|
||||
@ -80,10 +81,10 @@
|
||||
*
|
||||
* Macros for which compiler extension to use for atomics on intrinsice types
|
||||
*
|
||||
* KOKKOS_ATOMICS_USE_CUDA
|
||||
* KOKKOS_ATOMICS_USE_GNU
|
||||
* KOKKOS_ATOMICS_USE_INTEL
|
||||
* KOKKOS_ATOMICS_USE_OPENMP31
|
||||
* KOKKOS_ENABLE_CUDA_ATOMICS
|
||||
* KOKKOS_ENABLE_GNU_ATOMICS
|
||||
* KOKKOS_ENABLE_INTEL_ATOMICS
|
||||
* KOKKOS_ENABLE_OPENMP_ATOMICS
|
||||
*
|
||||
* A suite of 'KOKKOS_HAVE_PRAGMA_...' are defined for internal use.
|
||||
*
|
||||
@ -96,7 +97,7 @@
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#if defined( KOKKOS_HAVE_CUDA ) && defined( __CUDACC__ )
|
||||
#if defined( KOKKOS_ENABLE_CUDA ) && defined( __CUDACC__ )
|
||||
|
||||
/* Compiling with a CUDA compiler.
|
||||
*
|
||||
@ -126,7 +127,7 @@
|
||||
#error "Cuda device capability >= 3.0 is required"
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_CUDA_USE_LAMBDA
|
||||
#ifdef KOKKOS_ENABLE_CUDA_LAMBDA
|
||||
#if ( CUDA_VERSION < 7050 )
|
||||
// CUDA supports C++11 lambdas generated in host code to be given
|
||||
// to the device starting with version 7.5. But the release candidate (7.5.6)
|
||||
@ -137,18 +138,18 @@
|
||||
#define KOKKOS_LAMBDA [=]__device__
|
||||
#else
|
||||
#define KOKKOS_LAMBDA [=]__host__ __device__
|
||||
#if defined( KOKKOS_HAVE_CXX1Z )
|
||||
#if defined( KOKKOS_ENABLE_CXX1Z )
|
||||
#define KOKKOS_CLASS_LAMBDA [=,*this] __host__ __device__
|
||||
#endif
|
||||
#endif
|
||||
#define KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA 1
|
||||
#define KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA 1
|
||||
#endif
|
||||
#endif /* #if defined( KOKKOS_HAVE_CUDA ) && defined( __CUDACC__ ) */
|
||||
#endif /* #if defined( KOKKOS_ENABLE_CUDA ) && defined( __CUDACC__ ) */
|
||||
|
||||
|
||||
#if defined(KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA)
|
||||
#if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA)
|
||||
// Cuda version 8.0 still needs the functor wrapper
|
||||
#if (KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA /* && (CUDA_VERSION < 8000) */ ) && defined(__NVCC__)
|
||||
#if (KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA /* && (CUDA_VERSION < 8000) */ ) && defined(__NVCC__)
|
||||
#define KOKKOS_IMPL_NEED_FUNCTOR_WRAPPER
|
||||
#endif
|
||||
#endif
|
||||
@ -156,7 +157,7 @@
|
||||
/*--------------------------------------------------------------------------*/
|
||||
/* Language info: C++, CUDA, OPENMP */
|
||||
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
#if defined( KOKKOS_ENABLE_CUDA )
|
||||
// Compiling Cuda code to 'ptx'
|
||||
|
||||
#define KOKKOS_FORCEINLINE_FUNCTION __device__ __host__ __forceinline__
|
||||
@ -185,21 +186,21 @@
|
||||
#define KOKKOS_COMPILER_NVCC __NVCC__
|
||||
|
||||
#else
|
||||
#if defined( KOKKOS_HAVE_CXX11 ) && ! defined( KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA )
|
||||
#if !defined (KOKKOS_HAVE_CUDA) // Compiling with clang for Cuda does not work with LAMBDAs either
|
||||
#if ! defined( KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA )
|
||||
#if !defined (KOKKOS_ENABLE_CUDA) // Compiling with clang for Cuda does not work with LAMBDAs either
|
||||
// CUDA (including version 6.5) does not support giving lambdas as
|
||||
// arguments to global functions. Thus its not currently possible
|
||||
// to dispatch lambdas from the host.
|
||||
#define KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA 1
|
||||
#define KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA 1
|
||||
#endif
|
||||
#endif
|
||||
#endif /* #if defined( __NVCC__ ) */
|
||||
|
||||
#if defined( KOKKOS_HAVE_CXX11 ) && !defined (KOKKOS_LAMBDA)
|
||||
#if !defined (KOKKOS_LAMBDA)
|
||||
#define KOKKOS_LAMBDA [=]
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_HAVE_CXX1Z ) && !defined (KOKKOS_CLASS_LAMBDA)
|
||||
#if defined( KOKKOS_ENABLE_CXX1Z ) && !defined (KOKKOS_CLASS_LAMBDA)
|
||||
#define KOKKOS_CLASS_LAMBDA [=,*this]
|
||||
#endif
|
||||
|
||||
@ -259,11 +260,11 @@
|
||||
|
||||
#if defined( KOKKOS_COMPILER_INTEL )
|
||||
|
||||
#define KOKKOS_HAVE_PRAGMA_UNROLL 1
|
||||
#define KOKKOS_HAVE_PRAGMA_IVDEP 1
|
||||
#define KOKKOS_HAVE_PRAGMA_LOOPCOUNT 1
|
||||
#define KOKKOS_HAVE_PRAGMA_VECTOR 1
|
||||
#define KOKKOS_HAVE_PRAGMA_SIMD 1
|
||||
#define KOKKOS_ENABLE_PRAGMA_UNROLL 1
|
||||
#define KOKKOS_ENABLE_PRAGMA_IVDEP 1
|
||||
#define KOKKOS_ENABLE_PRAGMA_LOOPCOUNT 1
|
||||
#define KOKKOS_ENABLE_PRAGMA_VECTOR 1
|
||||
#define KOKKOS_ENABLE_PRAGMA_SIMD 1
|
||||
|
||||
#define KOKKOS_RESTRICT __restrict__
|
||||
|
||||
@ -317,11 +318,11 @@
|
||||
|
||||
#if defined( KOKKOS_COMPILER_IBM )
|
||||
|
||||
#define KOKKOS_HAVE_PRAGMA_UNROLL 1
|
||||
//#define KOKKOS_HAVE_PRAGMA_IVDEP 1
|
||||
//#define KOKKOS_HAVE_PRAGMA_LOOPCOUNT 1
|
||||
//#define KOKKOS_HAVE_PRAGMA_VECTOR 1
|
||||
//#define KOKKOS_HAVE_PRAGMA_SIMD 1
|
||||
#define KOKKOS_ENABLE_PRAGMA_UNROLL 1
|
||||
//#define KOKKOS_ENABLE_PRAGMA_IVDEP 1
|
||||
//#define KOKKOS_ENABLE_PRAGMA_LOOPCOUNT 1
|
||||
//#define KOKKOS_ENABLE_PRAGMA_VECTOR 1
|
||||
//#define KOKKOS_ENABLE_PRAGMA_SIMD 1
|
||||
|
||||
#endif
|
||||
|
||||
@ -330,11 +331,11 @@
|
||||
|
||||
#if defined( KOKKOS_COMPILER_CLANG )
|
||||
|
||||
//#define KOKKOS_HAVE_PRAGMA_UNROLL 1
|
||||
//#define KOKKOS_HAVE_PRAGMA_IVDEP 1
|
||||
//#define KOKKOS_HAVE_PRAGMA_LOOPCOUNT 1
|
||||
//#define KOKKOS_HAVE_PRAGMA_VECTOR 1
|
||||
//#define KOKKOS_HAVE_PRAGMA_SIMD 1
|
||||
//#define KOKKOS_ENABLE_PRAGMA_UNROLL 1
|
||||
//#define KOKKOS_ENABLE_PRAGMA_IVDEP 1
|
||||
//#define KOKKOS_ENABLE_PRAGMA_LOOPCOUNT 1
|
||||
//#define KOKKOS_ENABLE_PRAGMA_VECTOR 1
|
||||
//#define KOKKOS_ENABLE_PRAGMA_SIMD 1
|
||||
|
||||
#if ! defined( KOKKOS_FORCEINLINE_FUNCTION )
|
||||
#define KOKKOS_FORCEINLINE_FUNCTION inline __attribute__((always_inline))
|
||||
@ -347,11 +348,11 @@
|
||||
|
||||
#if defined( KOKKOS_COMPILER_GNU )
|
||||
|
||||
//#define KOKKOS_HAVE_PRAGMA_UNROLL 1
|
||||
//#define KOKKOS_HAVE_PRAGMA_IVDEP 1
|
||||
//#define KOKKOS_HAVE_PRAGMA_LOOPCOUNT 1
|
||||
//#define KOKKOS_HAVE_PRAGMA_VECTOR 1
|
||||
//#define KOKKOS_HAVE_PRAGMA_SIMD 1
|
||||
//#define KOKKOS_ENABLE_PRAGMA_UNROLL 1
|
||||
//#define KOKKOS_ENABLE_PRAGMA_IVDEP 1
|
||||
//#define KOKKOS_ENABLE_PRAGMA_LOOPCOUNT 1
|
||||
//#define KOKKOS_ENABLE_PRAGMA_VECTOR 1
|
||||
//#define KOKKOS_ENABLE_PRAGMA_SIMD 1
|
||||
|
||||
#if ! defined( KOKKOS_FORCEINLINE_FUNCTION )
|
||||
#define KOKKOS_FORCEINLINE_FUNCTION inline __attribute__((always_inline))
|
||||
@ -371,11 +372,11 @@
|
||||
|
||||
#if defined( KOKKOS_COMPILER_PGI )
|
||||
|
||||
#define KOKKOS_HAVE_PRAGMA_UNROLL 1
|
||||
#define KOKKOS_HAVE_PRAGMA_IVDEP 1
|
||||
//#define KOKKOS_HAVE_PRAGMA_LOOPCOUNT 1
|
||||
#define KOKKOS_HAVE_PRAGMA_VECTOR 1
|
||||
//#define KOKKOS_HAVE_PRAGMA_SIMD 1
|
||||
#define KOKKOS_ENABLE_PRAGMA_UNROLL 1
|
||||
#define KOKKOS_ENABLE_PRAGMA_IVDEP 1
|
||||
//#define KOKKOS_ENABLE_PRAGMA_LOOPCOUNT 1
|
||||
#define KOKKOS_ENABLE_PRAGMA_VECTOR 1
|
||||
//#define KOKKOS_ENABLE_PRAGMA_SIMD 1
|
||||
|
||||
#endif
|
||||
|
||||
@ -384,7 +385,7 @@
|
||||
#if defined( KOKKOS_COMPILER_NVCC )
|
||||
|
||||
#if defined(__CUDA_ARCH__ )
|
||||
#define KOKKOS_HAVE_PRAGMA_UNROLL 1
|
||||
#define KOKKOS_ENABLE_PRAGMA_UNROLL 1
|
||||
#endif
|
||||
|
||||
#endif
|
||||
@ -426,19 +427,15 @@
|
||||
#define KOKKOS_ALIGN_PTR(size) __attribute__((aligned(size)))
|
||||
#endif
|
||||
|
||||
#if ! defined(KOKKOS_ALIGN_16)
|
||||
#define KOKKOS_ALIGN_16 KOKKOS_ALIGN(16)
|
||||
#endif
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
/** Determine the default execution space for parallel dispatch.
|
||||
* There is zero or one default execution space specified.
|
||||
*/
|
||||
|
||||
#if 1 < ( ( defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_CUDA ) ? 1 : 0 ) + \
|
||||
( defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_OPENMP ) ? 1 : 0 ) + \
|
||||
( defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_THREADS ) ? 1 : 0 ) + \
|
||||
( defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_SERIAL ) ? 1 : 0 ) )
|
||||
#if 1 < ( ( defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_CUDA ) ? 1 : 0 ) + \
|
||||
( defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP ) ? 1 : 0 ) + \
|
||||
( defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS ) ? 1 : 0 ) + \
|
||||
( defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_SERIAL ) ? 1 : 0 ) )
|
||||
|
||||
#error "More than one KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_* specified" ;
|
||||
|
||||
@ -447,24 +444,24 @@
|
||||
/** If default is not specified then chose from enabled execution spaces.
|
||||
* Priority: CUDA, OPENMP, THREADS, SERIAL
|
||||
*/
|
||||
#if defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_CUDA )
|
||||
#elif defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_OPENMP )
|
||||
#elif defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_THREADS )
|
||||
#elif defined ( KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_SERIAL )
|
||||
#elif defined ( KOKKOS_HAVE_CUDA )
|
||||
#define KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_CUDA
|
||||
#elif defined ( KOKKOS_HAVE_OPENMP )
|
||||
#define KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_OPENMP
|
||||
#elif defined ( KOKKOS_HAVE_PTHREAD )
|
||||
#define KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_THREADS
|
||||
#if defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_CUDA )
|
||||
#elif defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP )
|
||||
#elif defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS )
|
||||
#elif defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_SERIAL )
|
||||
#elif defined ( KOKKOS_ENABLE_CUDA )
|
||||
#define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_CUDA
|
||||
#elif defined ( KOKKOS_ENABLE_OPENMP )
|
||||
#define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP
|
||||
#elif defined ( KOKKOS_ENABLE_PTHREAD )
|
||||
#define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS
|
||||
#else
|
||||
#define KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_SERIAL
|
||||
#define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_SERIAL
|
||||
#endif
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
/** Determine for what space the code is being compiled: */
|
||||
|
||||
#if defined( __CUDACC__ ) && defined( __CUDA_ARCH__ ) && defined (KOKKOS_HAVE_CUDA)
|
||||
#if defined( __CUDACC__ ) && defined( __CUDA_ARCH__ ) && defined (KOKKOS_ENABLE_CUDA)
|
||||
#define KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA
|
||||
#else
|
||||
#define KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
@ -476,7 +473,7 @@
|
||||
#if ( defined( _POSIX_C_SOURCE ) && _POSIX_C_SOURCE >= 200112L ) || \
|
||||
( defined( _XOPEN_SOURCE ) && _XOPEN_SOURCE >= 600 )
|
||||
#if defined(KOKKOS_ENABLE_PERFORMANCE_POSIX_MEMALIGN)
|
||||
#define KOKKOS_POSIX_MEMALIGN_AVAILABLE 1
|
||||
#define KOKKOS_ENABLE_POSIX_MEMALIGN 1
|
||||
#endif
|
||||
#endif
|
||||
|
||||
@ -489,15 +486,6 @@
|
||||
#define KOKKOS_ENABLE_PROFILING 1
|
||||
#endif
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
/* Transitional macro to change between old and new View
|
||||
* are no longer supported.
|
||||
*/
|
||||
|
||||
#define KOKKOS_USING_EXP_VIEW 1
|
||||
#define KOKKOS_USING_EXPERIMENTAL_VIEW
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
|
||||
@ -57,18 +57,18 @@
|
||||
|
||||
// How should errors be handled? In general, production code should return a
|
||||
// value indicating failure so the user can decide how the error is handled.
|
||||
// While experimental, code can abort instead. If KOKKOS_MEMPOOL_PRINTERR is
|
||||
// While experimental, code can abort instead. If KOKKOS_ENABLE_MEMPOOL_PRINTERR is
|
||||
// defined, the code will abort with an error message. Otherwise, the code will
|
||||
// return with a value indicating failure when possible, or do nothing instead.
|
||||
//#define KOKKOS_MEMPOOL_PRINTERR
|
||||
//#define KOKKOS_ENABLE_MEMPOOL_PRINTERR
|
||||
|
||||
//#define KOKKOS_MEMPOOL_PRINT_INFO
|
||||
//#define KOKKOS_MEMPOOL_PRINT_CONSTRUCTOR_INFO
|
||||
//#define KOKKOS_MEMPOOL_PRINT_BLOCKSIZE_INFO
|
||||
//#define KOKKOS_MEMPOOL_PRINT_SUPERBLOCK_INFO
|
||||
//#define KOKKOS_MEMPOOL_PRINT_ACTIVE_SUPERBLOCKS
|
||||
//#define KOKKOS_MEMPOOL_PRINT_PAGE_INFO
|
||||
//#define KOKKOS_MEMPOOL_PRINT_INDIVIDUAL_PAGE_INFO
|
||||
//#define KOKKOS_ENABLE_MEMPOOL_PRINT_INFO
|
||||
//#define KOKKOS_ENABLE_MEMPOOL_PRINT_CONSTRUCTOR_INFO
|
||||
//#define KOKKOS_ENABLE_MEMPOOL_PRINT_BLOCKSIZE_INFO
|
||||
//#define KOKKOS_ENABLE_MEMPOOL_PRINT_SUPERBLOCK_INFO
|
||||
//#define KOKKOS_ENABLE_MEMPOOL_PRINT_ACTIVE_SUPERBLOCKS
|
||||
//#define KOKKOS_ENABLE_MEMPOOL_PRINT_PAGE_INFO
|
||||
//#define KOKKOS_ENABLE_MEMPOOL_PRINT_INDIVIDUAL_PAGE_INFO
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
@ -451,7 +451,7 @@ struct create_histogram {
|
||||
}
|
||||
};
|
||||
|
||||
#ifdef KOKKOS_MEMPOOL_PRINT_SUPERBLOCK_INFO
|
||||
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINT_SUPERBLOCK_INFO
|
||||
template < typename UInt32View, typename SBHeaderView, typename MempoolBitset >
|
||||
struct count_allocated_blocks {
|
||||
typedef typename UInt32View::execution_space execution_space;
|
||||
@ -790,7 +790,7 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef KOKKOS_MEMPOOL_PRINT_CONSTRUCTOR_INFO
|
||||
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINT_CONSTRUCTOR_INFO
|
||||
printf( "\n" );
|
||||
printf( " m_lg_sb_size: %12lu\n", m_lg_sb_size );
|
||||
printf( " m_sb_size: %12lu\n", m_sb_size );
|
||||
@ -810,7 +810,7 @@ public:
|
||||
fflush( stdout );
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_MEMPOOL_PRINT_BLOCKSIZE_INFO
|
||||
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINT_BLOCKSIZE_INFO
|
||||
// Print the blocksize info for all the block sizes.
|
||||
printf( "SIZE BLOCKS_PER_SB PAGES_PER_SB SB_FULL_LEVEL PAGE_FULL_LEVEL\n" );
|
||||
for ( size_t i = 0; i < m_num_block_size; ++i ) {
|
||||
@ -845,7 +845,7 @@ public:
|
||||
uint32_t blocks_per_sb = m_blocksize_info[block_size_id].m_blocks_per_sb;
|
||||
uint32_t pages_per_sb = m_blocksize_info[block_size_id].m_pages_per_sb;
|
||||
|
||||
#ifdef KOKKOS_CUDA_CLANG_WORKAROUND
|
||||
#ifdef KOKKOS_IMPL_CUDA_CLANG_WORKAROUND
|
||||
// Without this test it looks like pages_per_sb might come back wrong.
|
||||
if ( pages_per_sb == 0 ) return NULL;
|
||||
#endif
|
||||
@ -966,7 +966,7 @@ public:
|
||||
|
||||
if ( new_sb_id == sb_id ) {
|
||||
allocation_done = true;
|
||||
#ifdef KOKKOS_MEMPOOL_PRINT_INFO
|
||||
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINT_INFO
|
||||
printf( "** No superblocks available. **\n" );
|
||||
#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
fflush( stdout );
|
||||
@ -979,7 +979,7 @@ public:
|
||||
}
|
||||
}
|
||||
}
|
||||
#ifdef KOKKOS_MEMPOOL_PRINT_INFO
|
||||
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINT_INFO
|
||||
else {
|
||||
printf( "** Requested allocation size (%zu) larger than superblock size (%lu). **\n",
|
||||
alloc_size, m_sb_size );
|
||||
@ -1068,7 +1068,7 @@ public:
|
||||
}
|
||||
}
|
||||
}
|
||||
#ifdef KOKKOS_MEMPOOL_PRINTERR
|
||||
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINTERR
|
||||
else {
|
||||
printf( "\n** MemoryPool::deallocate() ADDRESS_OUT_OF_RANGE(0x%llx) **\n",
|
||||
reinterpret_cast<uint64_t>( alloc_ptr ) );
|
||||
@ -1109,7 +1109,7 @@ public:
|
||||
{
|
||||
printf( "\n" );
|
||||
|
||||
#ifdef KOKKOS_MEMPOOL_PRINT_SUPERBLOCK_INFO
|
||||
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINT_SUPERBLOCK_INFO
|
||||
typename SBHeaderView::HostMirror host_sb_header = create_mirror_view( m_sb_header );
|
||||
deep_copy( host_sb_header, m_sb_header );
|
||||
|
||||
@ -1188,7 +1188,7 @@ public:
|
||||
num_active_sb += host_active(i) != INVALID_SUPERBLOCK;
|
||||
}
|
||||
|
||||
#ifdef KOKKOS_MEMPOOL_PRINT_ACTIVE_SUPERBLOCKS
|
||||
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINT_ACTIVE_SUPERBLOCKS
|
||||
// Print active superblocks.
|
||||
printf( "BS_ID SB_ID\n" );
|
||||
for ( size_t i = 0; i < m_num_block_size; ++i ) {
|
||||
@ -1208,7 +1208,7 @@ public:
|
||||
fflush( stdout );
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_MEMPOOL_PRINT_PAGE_INFO
|
||||
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINT_PAGE_INFO
|
||||
// Print the summary page histogram.
|
||||
printf( "USED_BLOCKS PAGE_COUNT\n" );
|
||||
for ( uint32_t i = 0; i < 33; ++i ) {
|
||||
@ -1217,7 +1217,7 @@ public:
|
||||
printf( "\n" );
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_MEMPOOL_PRINT_INDIVIDUAL_PAGE_INFO
|
||||
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINT_INDIVIDUAL_PAGE_INFO
|
||||
// Print the page histogram for a few individual superblocks.
|
||||
// const uint32_t num_sb_id = 2;
|
||||
// uint32_t sb_id[num_sb_id] = { 0, 10 };
|
||||
@ -1484,7 +1484,7 @@ private:
|
||||
// 1. An invalid superblock should never be found here.
|
||||
// 2. If the new superblock is the same as the previous superblock, the
|
||||
// allocator is empty.
|
||||
#ifdef KOKKOS_MEMPOOL_PRINTERR
|
||||
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINTERR
|
||||
if ( new_sb == INVALID_SUPERBLOCK ) {
|
||||
printf( "\n** MemoryPool::find_superblock() FOUND_INACTIVE_SUPERBLOCK **\n" );
|
||||
#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
@ -1531,28 +1531,28 @@ private:
|
||||
} // namespace Experimental
|
||||
} // namespace Kokkos
|
||||
|
||||
#ifdef KOKKOS_MEMPOOL_PRINTERR
|
||||
#undef KOKKOS_MEMPOOL_PRINTERR
|
||||
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINTERR
|
||||
#undef KOKKOS_ENABLE_MEMPOOL_PRINTERR
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_MEMPOOL_PRINT_INFO
|
||||
#undef KOKKOS_MEMPOOL_PRINT_INFO
|
||||
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINT_INFO
|
||||
#undef KOKKOS_ENABLE_MEMPOOL_PRINT_INFO
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_MEMPOOL_PRINT_BLOCKSIZE_INFO
|
||||
#undef KOKKOS_MEMPOOL_PRINT_BLOCKSIZE_INFO
|
||||
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINT_BLOCKSIZE_INFO
|
||||
#undef KOKKOS_ENABLE_MEMPOOL_PRINT_BLOCKSIZE_INFO
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_MEMPOOL_PRINT_SUPERBLOCK_INFO
|
||||
#undef KOKKOS_MEMPOOL_PRINT_SUPERBLOCK_INFO
|
||||
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINT_SUPERBLOCK_INFO
|
||||
#undef KOKKOS_ENABLE_MEMPOOL_PRINT_SUPERBLOCK_INFO
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_MEMPOOL_PRINT_PAGE_INFO
|
||||
#undef KOKKOS_MEMPOOL_PRINT_PAGE_INFO
|
||||
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINT_PAGE_INFO
|
||||
#undef KOKKOS_ENABLE_MEMPOOL_PRINT_PAGE_INFO
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_MEMPOOL_PRINT_INDIVIDUAL_PAGE_INFO
|
||||
#undef KOKKOS_MEMPOOL_PRINT_INDIVIDUAL_PAGE_INFO
|
||||
#ifdef KOKKOS_ENABLE_MEMPOOL_PRINT_INDIVIDUAL_PAGE_INFO
|
||||
#undef KOKKOS_ENABLE_MEMPOOL_PRINT_INDIVIDUAL_PAGE_INFO
|
||||
#endif
|
||||
|
||||
#endif // KOKKOS_MEMORYPOOL_HPP
|
||||
|
||||
@ -46,14 +46,18 @@
|
||||
|
||||
#include <Kokkos_Core_fwd.hpp>
|
||||
|
||||
#if defined( KOKKOS_HAVE_OPENMP ) && defined( _OPENMP )
|
||||
#if defined( KOKKOS_ENABLE_OPENMP) && !defined(_OPENMP)
|
||||
#error "You enabled Kokkos OpenMP support without enabling OpenMP in the compiler!"
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_ENABLE_OPENMP ) && defined( _OPENMP )
|
||||
|
||||
#include <omp.h>
|
||||
|
||||
#include <cstddef>
|
||||
#include <iosfwd>
|
||||
#include <Kokkos_HostSpace.hpp>
|
||||
#ifdef KOKKOS_HAVE_HBWSPACE
|
||||
#ifdef KOKKOS_ENABLE_HBWSPACE
|
||||
#include <Kokkos_HBWSpace.hpp>
|
||||
#endif
|
||||
#include <Kokkos_ScratchSpace.hpp>
|
||||
@ -77,7 +81,7 @@ public:
|
||||
|
||||
//! Tag this class as a kokkos execution space
|
||||
typedef OpenMP execution_space ;
|
||||
#ifdef KOKKOS_HAVE_HBWSPACE
|
||||
#ifdef KOKKOS_ENABLE_HBWSPACE
|
||||
typedef Experimental::HBWSpace memory_space ;
|
||||
#else
|
||||
typedef HostSpace memory_space ;
|
||||
@ -194,7 +198,7 @@ struct VerifyExecutionCanAccessMemorySpace
|
||||
|
||||
/*--------------------------------------------------------------------------*/
|
||||
|
||||
#endif /* #if defined( KOKKOS_HAVE_OPENMP ) && defined( _OPENMP ) */
|
||||
#endif /* #if defined( KOKKOS_ENABLE_OPENMP ) && defined( _OPENMP ) */
|
||||
#endif /* #ifndef KOKKOS_OPENMP_HPP */
|
||||
|
||||
|
||||
|
||||
@ -61,7 +61,7 @@
|
||||
#include <impl/Kokkos_Traits.hpp>
|
||||
#include <impl/Kokkos_FunctorAdapter.hpp>
|
||||
|
||||
#ifdef KOKKOS_HAVE_DEBUG
|
||||
#ifdef KOKKOS_DEBUG
|
||||
#include<iostream>
|
||||
#endif
|
||||
|
||||
|
||||
@ -978,7 +978,7 @@ struct ParallelReduceReturnValue<typename std::enable_if<Kokkos::is_view<ReturnT
|
||||
typedef InvalidType reducer_type;
|
||||
|
||||
typedef typename return_type::value_type value_type_scalar;
|
||||
typedef typename return_type::value_type value_type_array[];
|
||||
typedef typename return_type::value_type* const value_type_array;
|
||||
|
||||
typedef typename if_c<return_type::rank==0,value_type_scalar,value_type_array>::type value_type;
|
||||
|
||||
|
||||
@ -106,14 +106,14 @@ public:
|
||||
void* tmp = m_iter_L0 + m_offset * align (size);
|
||||
if (m_end_L0 < (m_iter_L0 += align (size) * m_multiplier)) {
|
||||
m_iter_L0 -= align (size) * m_multiplier; // put it back like it was
|
||||
#ifdef KOKKOS_HAVE_DEBUG
|
||||
#ifdef KOKKOS_DEBUG
|
||||
// mfh 23 Jun 2015: printf call consumes 25 registers
|
||||
// in a CUDA build, so only print in debug mode. The
|
||||
// function still returns NULL if not enough memory.
|
||||
printf ("ScratchMemorySpace<...>::get_shmem: Failed to allocate "
|
||||
"%ld byte(s); remaining capacity is %ld byte(s)\n", long(size),
|
||||
long(m_end_L0-m_iter_L0));
|
||||
#endif // KOKKOS_HAVE_DEBUG
|
||||
#endif // KOKKOS_DEBUG
|
||||
tmp = 0;
|
||||
}
|
||||
return tmp;
|
||||
@ -121,14 +121,14 @@ public:
|
||||
void* tmp = m_iter_L1 + m_offset * align (size);
|
||||
if (m_end_L1 < (m_iter_L1 += align (size) * m_multiplier)) {
|
||||
m_iter_L1 -= align (size) * m_multiplier; // put it back like it was
|
||||
#ifdef KOKKOS_HAVE_DEBUG
|
||||
#ifdef KOKKOS_DEBUG
|
||||
// mfh 23 Jun 2015: printf call consumes 25 registers
|
||||
// in a CUDA build, so only print in debug mode. The
|
||||
// function still returns NULL if not enough memory.
|
||||
printf ("ScratchMemorySpace<...>::get_shmem: Failed to allocate "
|
||||
"%ld byte(s); remaining capacity is %ld byte(s)\n", long(size),
|
||||
long(m_end_L1-m_iter_L1));
|
||||
#endif // KOKKOS_HAVE_DEBUG
|
||||
#endif // KOKKOS_DEBUG
|
||||
tmp = 0;
|
||||
}
|
||||
return tmp;
|
||||
|
||||
@ -61,7 +61,7 @@
|
||||
|
||||
#include <KokkosExp_MDRangePolicy.hpp>
|
||||
|
||||
#if defined( KOKKOS_HAVE_SERIAL )
|
||||
#if defined( KOKKOS_ENABLE_SERIAL )
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
@ -1005,7 +1005,7 @@ template<typename iType, class Lambda>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void parallel_for(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::SerialTeamMember >&
|
||||
loop_boundaries, const Lambda& lambda) {
|
||||
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
|
||||
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
|
||||
#pragma ivdep
|
||||
#endif
|
||||
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment)
|
||||
@ -1021,7 +1021,7 @@ KOKKOS_INLINE_FUNCTION
|
||||
void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::SerialTeamMember >&
|
||||
loop_boundaries, const Lambda & lambda, ValueType& result) {
|
||||
result = ValueType();
|
||||
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
|
||||
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
|
||||
#pragma ivdep
|
||||
#endif
|
||||
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
|
||||
@ -1044,7 +1044,7 @@ void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::S
|
||||
loop_boundaries, const Lambda & lambda, const JoinType& join, ValueType& init_result) {
|
||||
|
||||
ValueType result = init_result;
|
||||
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
|
||||
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
|
||||
#pragma ivdep
|
||||
#endif
|
||||
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
|
||||
@ -1075,7 +1075,7 @@ void parallel_scan(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::Ser
|
||||
|
||||
value_type scan_val = value_type();
|
||||
|
||||
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
|
||||
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
|
||||
#pragma ivdep
|
||||
#endif
|
||||
for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
|
||||
@ -1116,7 +1116,7 @@ void single(const Impl::ThreadSingleStruct<Impl::SerialTeamMember>& , const Func
|
||||
|
||||
#include <impl/Kokkos_Serial_Task.hpp>
|
||||
|
||||
#endif // defined( KOKKOS_HAVE_SERIAL )
|
||||
#endif // defined( KOKKOS_ENABLE_SERIAL )
|
||||
#endif /* #define KOKKOS_SERIAL_HPP */
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
@ -52,9 +52,9 @@
|
||||
// and use relocateable device code to enable the task policy.
|
||||
// nvcc relocatable device code option: --relocatable-device-code=true
|
||||
|
||||
#if ( defined( KOKKOS_HAVE_CUDA ) )
|
||||
#if ( defined( KOKKOS_ENABLE_CUDA ) )
|
||||
#if ( 8000 <= CUDA_VERSION ) && \
|
||||
defined( KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE )
|
||||
defined( KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE )
|
||||
|
||||
#define KOKKOS_ENABLE_TASKDAG
|
||||
|
||||
@ -63,7 +63,6 @@
|
||||
#define KOKKOS_ENABLE_TASKDAG
|
||||
#endif
|
||||
|
||||
|
||||
#if defined( KOKKOS_ENABLE_TASKDAG )
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
@ -90,6 +89,34 @@ class TaskScheduler ;
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
/*\brief Implementation data for task data management, access, and execution.
|
||||
*
|
||||
* CRTP Inheritance structure to allow static_cast from the
|
||||
* task root type and a task's FunctorType.
|
||||
*
|
||||
* TaskBase< Space , ResultType , FunctorType >
|
||||
* : TaskBase< Space , ResultType , void >
|
||||
* , FunctorType
|
||||
* { ... };
|
||||
*
|
||||
* TaskBase< Space , ResultType , void >
|
||||
* : TaskBase< Space , void , void >
|
||||
* { ... };
|
||||
*/
|
||||
template< typename Space , typename ResultType , typename FunctorType >
|
||||
class TaskBase ;
|
||||
|
||||
template< typename Space >
|
||||
class TaskExec ;
|
||||
|
||||
} // namespace Impl
|
||||
} // namespace Kokkos
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
/**
|
||||
@ -302,14 +329,6 @@ enum TaskPriority { TaskHighPriority = 0
|
||||
template< typename Space >
|
||||
void wait( TaskScheduler< Space > const & );
|
||||
|
||||
} // namespace Kokkos
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
|
||||
|
||||
} // namespace Kokkos
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
@ -363,20 +382,7 @@ private:
|
||||
, Future< A1 , A2 > const & arg
|
||||
, Options const & ... opts )
|
||||
{
|
||||
// Assign dependence to task->m_next
|
||||
// which will be processed within subsequent call to schedule.
|
||||
// Error if the dependence is reset.
|
||||
|
||||
if ( 0 != Kokkos::atomic_exchange(& task->m_next, arg.m_task) ) {
|
||||
Kokkos::abort("TaskScheduler ERROR: resetting task dependence");
|
||||
}
|
||||
|
||||
if ( 0 != arg.m_task ) {
|
||||
// The future may be destroyed upon returning from this call
|
||||
// so increment reference count to track this assignment.
|
||||
Kokkos::atomic_increment( &(arg.m_task->m_ref_count) );
|
||||
}
|
||||
|
||||
task->add_dependence( arg.m_task );
|
||||
assign( task , opts ... );
|
||||
}
|
||||
|
||||
@ -558,8 +564,7 @@ public:
|
||||
// Potentially spawning outside execution space so the
|
||||
// apply function pointer must be obtained from execution space.
|
||||
// Required for Cuda execution space function pointer.
|
||||
queue_type::specialization::template
|
||||
proc_set_apply< FunctorType >( & f.m_task->m_apply );
|
||||
m_queue->template proc_set_apply< FunctorType >( & f.m_task->m_apply );
|
||||
|
||||
m_queue->schedule( f.m_task );
|
||||
}
|
||||
@ -638,25 +643,13 @@ public:
|
||||
, value_type
|
||||
, FunctorType > ;
|
||||
|
||||
task_base * const zero = (task_base *) 0 ;
|
||||
task_base * const lock = (task_base *) task_base::LockTag ;
|
||||
task_type * const task = static_cast< task_type * >( task_self );
|
||||
|
||||
// Precondition:
|
||||
// task is in Executing state
|
||||
// therefore m_next == LockTag
|
||||
//
|
||||
// Change to m_next == 0 for no dependence
|
||||
|
||||
if ( lock != Kokkos::atomic_exchange( & task->m_next, zero ) ) {
|
||||
Kokkos::abort("TaskScheduler::respawn ERROR: already respawned");
|
||||
}
|
||||
// Reschedule task with no dependences.
|
||||
m_queue->reschedule( task );
|
||||
|
||||
// Dependences, if requested, are added here through parsing the arguments.
|
||||
assign( task , arg_options... );
|
||||
|
||||
// Postcondition:
|
||||
// task is in Executing-Respawn state
|
||||
// therefore m_next == dependece or 0
|
||||
}
|
||||
|
||||
//----------------------------------------
|
||||
@ -697,4 +690,3 @@ void wait( TaskScheduler< ExecSpace > const & policy )
|
||||
|
||||
#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */
|
||||
#endif /* #ifndef KOKKOS_TASKSCHEDULER_HPP */
|
||||
|
||||
|
||||
@ -46,7 +46,7 @@
|
||||
|
||||
#include <Kokkos_Core_fwd.hpp>
|
||||
|
||||
#if defined( KOKKOS_HAVE_PTHREAD )
|
||||
#if defined( KOKKOS_ENABLE_PTHREAD )
|
||||
|
||||
#include <cstddef>
|
||||
#include <iosfwd>
|
||||
@ -227,7 +227,7 @@ struct VerifyExecutionCanAccessMemorySpace
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#endif /* #if defined( KOKKOS_HAVE_PTHREAD ) */
|
||||
#endif /* #if defined( KOKKOS_ENABLE_PTHREAD ) */
|
||||
#endif /* #define KOKKOS_THREADS_HPP */
|
||||
|
||||
|
||||
|
||||
@ -47,10 +47,10 @@
|
||||
#include <stddef.h>
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#undef KOKKOS_USE_LIBRT
|
||||
#undef KOKKOS_ENABLE_LIBRT
|
||||
#include <gettimeofday.c>
|
||||
#else
|
||||
#ifdef KOKKOS_USE_LIBRT
|
||||
#ifdef KOKKOS_ENABLE_LIBRT
|
||||
#include <ctime>
|
||||
#else
|
||||
#include <sys/time.h>
|
||||
@ -63,7 +63,7 @@ namespace Kokkos {
|
||||
|
||||
class Timer {
|
||||
private:
|
||||
#ifdef KOKKOS_USE_LIBRT
|
||||
#ifdef KOKKOS_ENABLE_LIBRT
|
||||
struct timespec m_old;
|
||||
#else
|
||||
struct timeval m_old ;
|
||||
@ -74,7 +74,7 @@ public:
|
||||
|
||||
inline
|
||||
void reset() {
|
||||
#ifdef KOKKOS_USE_LIBRT
|
||||
#ifdef KOKKOS_ENABLE_LIBRT
|
||||
clock_gettime(CLOCK_REALTIME, &m_old);
|
||||
#else
|
||||
gettimeofday( & m_old , ((struct timezone *) NULL ) );
|
||||
@ -90,7 +90,7 @@ public:
|
||||
inline
|
||||
double seconds() const
|
||||
{
|
||||
#ifdef KOKKOS_USE_LIBRT
|
||||
#ifdef KOKKOS_ENABLE_LIBRT
|
||||
struct timespec m_new;
|
||||
clock_gettime(CLOCK_REALTIME, &m_new);
|
||||
|
||||
|
||||
@ -46,7 +46,7 @@
|
||||
#ifndef KOKKOS_VECTORIZATION_HPP
|
||||
#define KOKKOS_VECTORIZATION_HPP
|
||||
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
#if defined( KOKKOS_ENABLE_CUDA )
|
||||
#include <Cuda/Kokkos_Cuda_Vectorization.hpp>
|
||||
#endif
|
||||
|
||||
|
||||
@ -623,13 +623,13 @@ private:
|
||||
|
||||
#if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK )
|
||||
|
||||
#define KOKKOS_VIEW_OPERATOR_VERIFY( ARG ) \
|
||||
#define KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( ARG ) \
|
||||
View::template verify_space< Kokkos::Impl::ActiveExecutionMemorySpace >::check(); \
|
||||
Kokkos::Impl::view_verify_operator_bounds ARG ;
|
||||
|
||||
#else
|
||||
|
||||
#define KOKKOS_VIEW_OPERATOR_VERIFY( ARG ) \
|
||||
#define KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( ARG ) \
|
||||
View::template verify_space< Kokkos::Impl::ActiveExecutionMemorySpace >::check();
|
||||
|
||||
#endif
|
||||
@ -647,9 +647,9 @@ public:
|
||||
operator()( Args ... args ) const
|
||||
{
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,args...) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,args...) )
|
||||
#endif
|
||||
|
||||
return m_map.reference();
|
||||
@ -670,9 +670,9 @@ public:
|
||||
, Args ... args ) const
|
||||
{
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,args...) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,args...) )
|
||||
#endif
|
||||
|
||||
return m_map.reference(i0);
|
||||
@ -692,9 +692,9 @@ public:
|
||||
{
|
||||
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,args...) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,args...) )
|
||||
#endif
|
||||
|
||||
return m_map.m_handle[ i0 ];
|
||||
@ -713,9 +713,9 @@ public:
|
||||
, Args ... args ) const
|
||||
{
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,args...) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,args...) )
|
||||
#endif
|
||||
|
||||
return m_map.m_handle[ m_map.m_offset.m_stride.S0 * i0 ];
|
||||
@ -734,9 +734,9 @@ public:
|
||||
operator[]( const I0 & i0 ) const
|
||||
{
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0) )
|
||||
#endif
|
||||
|
||||
return m_map.reference(i0);
|
||||
@ -753,9 +753,9 @@ public:
|
||||
operator[]( const I0 & i0 ) const
|
||||
{
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0) )
|
||||
#endif
|
||||
|
||||
return m_map.m_handle[ i0 ];
|
||||
@ -772,9 +772,9 @@ public:
|
||||
operator[]( const I0 & i0 ) const
|
||||
{
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0) )
|
||||
#endif
|
||||
|
||||
return m_map.m_handle[ m_map.m_offset.m_stride.S0 * i0 ];
|
||||
@ -795,9 +795,9 @@ public:
|
||||
, Args ... args ) const
|
||||
{
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,args...) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,args...) )
|
||||
#endif
|
||||
|
||||
return m_map.reference(i0,i1);
|
||||
@ -816,9 +816,9 @@ public:
|
||||
, Args ... args ) const
|
||||
{
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,args...) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,args...) )
|
||||
#endif
|
||||
|
||||
return m_map.m_handle[ i0 + m_map.m_offset.m_dim.N0 * i1 ];
|
||||
@ -837,9 +837,9 @@ public:
|
||||
, Args ... args ) const
|
||||
{
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,args...) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,args...) )
|
||||
#endif
|
||||
|
||||
return m_map.m_handle[ i0 + m_map.m_offset.m_stride * i1 ];
|
||||
@ -858,9 +858,9 @@ public:
|
||||
, Args ... args ) const
|
||||
{
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,args...) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,args...) )
|
||||
#endif
|
||||
|
||||
return m_map.m_handle[ i1 + m_map.m_offset.m_dim.N1 * i0 ];
|
||||
@ -879,9 +879,9 @@ public:
|
||||
, Args ... args ) const
|
||||
{
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,args...) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,args...) )
|
||||
#endif
|
||||
|
||||
return m_map.m_handle[ i1 + m_map.m_offset.m_stride * i0 ];
|
||||
@ -900,9 +900,9 @@ public:
|
||||
, Args ... args ) const
|
||||
{
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,args...) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,args...) )
|
||||
#endif
|
||||
|
||||
return m_map.m_handle[ i0 * m_map.m_offset.m_stride.S0 +
|
||||
@ -924,9 +924,9 @@ public:
|
||||
, Args ... args ) const
|
||||
{
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,args...) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,args...) )
|
||||
#endif
|
||||
|
||||
return m_map.m_handle[ m_map.m_offset(i0,i1,i2) ];
|
||||
@ -944,9 +944,9 @@ public:
|
||||
, Args ... args ) const
|
||||
{
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,args...) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,args...) )
|
||||
#endif
|
||||
|
||||
return m_map.reference(i0,i1,i2);
|
||||
@ -967,9 +967,9 @@ public:
|
||||
, Args ... args ) const
|
||||
{
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,args...) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,args...) )
|
||||
#endif
|
||||
|
||||
return m_map.m_handle[ m_map.m_offset(i0,i1,i2,i3) ];
|
||||
@ -987,9 +987,9 @@ public:
|
||||
, Args ... args ) const
|
||||
{
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,args...) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,args...) )
|
||||
#endif
|
||||
|
||||
return m_map.reference(i0,i1,i2,i3);
|
||||
@ -1012,9 +1012,9 @@ public:
|
||||
, Args ... args ) const
|
||||
{
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,args...) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,args...) )
|
||||
#endif
|
||||
|
||||
return m_map.m_handle[ m_map.m_offset(i0,i1,i2,i3,i4) ];
|
||||
@ -1034,9 +1034,9 @@ public:
|
||||
, Args ... args ) const
|
||||
{
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,args...) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,args...) )
|
||||
#endif
|
||||
|
||||
return m_map.reference(i0,i1,i2,i3,i4);
|
||||
@ -1059,9 +1059,9 @@ public:
|
||||
, Args ... args ) const
|
||||
{
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,i5,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,i5,args...) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,args...) )
|
||||
#endif
|
||||
|
||||
return m_map.m_handle[ m_map.m_offset(i0,i1,i2,i3,i4,i5) ];
|
||||
@ -1081,9 +1081,9 @@ public:
|
||||
, Args ... args ) const
|
||||
{
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,i5,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,i5,args...) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,args...) )
|
||||
#endif
|
||||
|
||||
return m_map.reference(i0,i1,i2,i3,i4,i5);
|
||||
@ -1106,9 +1106,9 @@ public:
|
||||
, Args ... args ) const
|
||||
{
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,i5,i6,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,i5,i6,args...) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,i6,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,i6,args...) )
|
||||
#endif
|
||||
|
||||
return m_map.m_handle[ m_map.m_offset(i0,i1,i2,i3,i4,i5,i6) ];
|
||||
@ -1128,9 +1128,9 @@ public:
|
||||
, Args ... args ) const
|
||||
{
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,i5,i6,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,i5,i6,args...) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,i6,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,i6,args...) )
|
||||
#endif
|
||||
|
||||
return m_map.reference(i0,i1,i2,i3,i4,i5,i6);
|
||||
@ -1153,9 +1153,9 @@ public:
|
||||
, Args ... args ) const
|
||||
{
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,i5,i6,i7,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,i5,i6,i7,args...) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,i6,i7,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,i6,i7,args...) )
|
||||
#endif
|
||||
|
||||
return m_map.m_handle[ m_map.m_offset(i0,i1,i2,i3,i4,i5,i6,i7) ];
|
||||
@ -1175,15 +1175,15 @@ public:
|
||||
, Args ... args ) const
|
||||
{
|
||||
#ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,i5,i6,i7,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,i5,i6,i7,args...) )
|
||||
#else
|
||||
KOKKOS_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,i6,i7,args...) )
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,i6,i7,args...) )
|
||||
#endif
|
||||
|
||||
return m_map.reference(i0,i1,i2,i3,i4,i5,i6,i7);
|
||||
}
|
||||
|
||||
#undef KOKKOS_VIEW_OPERATOR_VERIFY
|
||||
#undef KOKKOS_IMPL_VIEW_OPERATOR_VERIFY
|
||||
|
||||
//----------------------------------------
|
||||
// Standard destructor, constructors, and assignment operators
|
||||
@ -1322,7 +1322,7 @@ public:
|
||||
alloc_prop prop( arg_prop );
|
||||
|
||||
//------------------------------------------------------------
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
#if defined( KOKKOS_ENABLE_CUDA )
|
||||
// If allocating in CudaUVMSpace must fence before and after
|
||||
// the allocation to protect against possible concurrent access
|
||||
// on the CPU and the GPU.
|
||||
@ -1338,7 +1338,7 @@ public:
|
||||
record = m_map.allocate_shared( prop , arg_layout );
|
||||
|
||||
//------------------------------------------------------------
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
#if defined( KOKKOS_ENABLE_CUDA )
|
||||
if ( std::is_same< Kokkos::CudaUVMSpace , typename traits::device_type::memory_space >::value ) {
|
||||
traits::device_type::memory_space::execution_space::fence();
|
||||
}
|
||||
|
||||
@ -79,7 +79,7 @@ private:
|
||||
, const Member ibeg , const Member iend )
|
||||
{
|
||||
#ifdef KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION
|
||||
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
|
||||
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
|
||||
#pragma ivdep
|
||||
#endif
|
||||
#endif
|
||||
@ -96,7 +96,7 @@ private:
|
||||
{
|
||||
const TagType t{} ;
|
||||
#ifdef KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION
|
||||
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
|
||||
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
|
||||
#pragma ivdep
|
||||
#endif
|
||||
#endif
|
||||
@ -218,7 +218,7 @@ private:
|
||||
, reference_type update )
|
||||
{
|
||||
#ifdef KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION
|
||||
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
|
||||
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
|
||||
#pragma ivdep
|
||||
#endif
|
||||
#endif
|
||||
@ -236,7 +236,7 @@ private:
|
||||
{
|
||||
const TagType t{} ;
|
||||
#ifdef KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION
|
||||
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
|
||||
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
|
||||
#pragma ivdep
|
||||
#endif
|
||||
#endif
|
||||
@ -417,7 +417,7 @@ private:
|
||||
, reference_type update , const bool final )
|
||||
{
|
||||
#ifdef KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION
|
||||
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
|
||||
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
|
||||
#pragma ivdep
|
||||
#endif
|
||||
#endif
|
||||
@ -435,7 +435,7 @@ private:
|
||||
{
|
||||
const TagType t{} ;
|
||||
#ifdef KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION
|
||||
#ifdef KOKKOS_HAVE_PRAGMA_IVDEP
|
||||
#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP
|
||||
#pragma ivdep
|
||||
#endif
|
||||
#endif
|
||||
|
||||
@ -43,7 +43,7 @@
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
|
||||
#if defined( KOKKOS_HAVE_OPENMP ) && defined( KOKKOS_ENABLE_TASKDAG )
|
||||
#if defined( KOKKOS_ENABLE_OPENMP ) && defined( KOKKOS_ENABLE_TASKDAG )
|
||||
|
||||
#include <impl/Kokkos_TaskQueue_impl.hpp>
|
||||
|
||||
@ -324,6 +324,6 @@ void TaskQueueSpecialization< Kokkos::OpenMP >::
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#endif /* #if defined( KOKKOS_HAVE_OPENMP ) && defined( KOKKOS_ENABLE_TASKDAG ) */
|
||||
#endif /* #if defined( KOKKOS_ENABLE_OPENMP ) && defined( KOKKOS_ENABLE_TASKDAG ) */
|
||||
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user