Merge branch 'develop' into kokkos_unittests

This commit is contained in:
Richard Berger
2021-12-20 10:54:27 -05:00
697 changed files with 32785 additions and 15181 deletions

View File

@ -39,8 +39,8 @@ if(DOWNLOAD_KOKKOS)
list(APPEND KOKKOS_LIB_BUILD_ARGS "-DCMAKE_CXX_EXTENSIONS=${CMAKE_CXX_EXTENSIONS}")
list(APPEND KOKKOS_LIB_BUILD_ARGS "-DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}")
include(ExternalProject)
set(KOKKOS_URL "https://github.com/kokkos/kokkos/archive/3.4.01.tar.gz" CACHE STRING "URL for KOKKOS tarball")
set(KOKKOS_MD5 "4c84698917c93a18985b311bb6caf84f" CACHE STRING "MD5 checksum of KOKKOS tarball")
set(KOKKOS_URL "https://github.com/kokkos/kokkos/archive/3.5.00.tar.gz" CACHE STRING "URL for KOKKOS tarball")
set(KOKKOS_MD5 "079323d973ae0e1c38c0a54a150c674e" CACHE STRING "MD5 checksum of KOKKOS tarball")
mark_as_advanced(KOKKOS_URL)
mark_as_advanced(KOKKOS_MD5)
ExternalProject_Add(kokkos_build
@ -60,7 +60,7 @@ if(DOWNLOAD_KOKKOS)
target_link_libraries(lmp PRIVATE LAMMPS::KOKKOS)
add_dependencies(LAMMPS::KOKKOS kokkos_build)
elseif(EXTERNAL_KOKKOS)
find_package(Kokkos 3.4.01 REQUIRED CONFIG)
find_package(Kokkos 3.5.00 REQUIRED CONFIG)
target_link_libraries(lammps PRIVATE Kokkos::kokkos)
target_link_libraries(lmp PRIVATE Kokkos::kokkos)
else()

View File

@ -54,8 +54,8 @@ if(DOWNLOAD_PLUMED)
set(PLUMED_BUILD_BYPRODUCTS "<INSTALL_DIR>/lib/libplumedWrapper.a")
endif()
set(PLUMED_URL "https://github.com/plumed/plumed2/releases/download/v2.7.2/plumed-src-2.7.2.tgz" CACHE STRING "URL for PLUMED tarball")
set(PLUMED_MD5 "cfa0b4dd90a81c25d3302e8d97bfeaea" CACHE STRING "MD5 checksum of PLUMED tarball")
set(PLUMED_URL "https://github.com/plumed/plumed2/releases/download/v2.7.3/plumed-src-2.7.3.tgz" CACHE STRING "URL for PLUMED tarball")
set(PLUMED_MD5 "f00cc82edfefe6bb3df934911dbe32fb" CACHE STRING "MD5 checksum of PLUMED tarball")
mark_as_advanced(PLUMED_URL)
mark_as_advanced(PLUMED_MD5)

View File

@ -1,4 +1,4 @@
.TH LAMMPS "1" "27 October 2021" "2021-10-27"
.TH LAMMPS "1" "14 December 2021" "2021-12-14"
.SH NAME
.B LAMMPS
\- Molecular Dynamics Simulator.

View File

@ -185,6 +185,10 @@ The ``ctest`` command has many options, the most important ones are:
- run subset of tests matching the regular expression <regex>
* - -E <regex>
- exclude subset of tests matching the regular expression <regex>
* - -L <regex>
- run subset of tests with a label matching the regular expression <regex>
* - -LE <regex>
- exclude subset of tests with a label matching the regular expression <regex>
* - -N
- dry-run: display list of tests without running them
* - -T memcheck
@ -299,6 +303,12 @@ will destroy the original file, if the generation run does not complete,
so using *-g* is recommended unless the YAML file is fully tested
and working.
Some of the force style tests are rather slow to run and some are very
sensitive to small differences like CPU architecture, compiler
toolchain, compiler optimization. Those tests are flagged with a "slow"
and/or "unstable" label, and thus those tests can be selectively
excluded with the ``-LE`` flag or selected with the ``-L`` flag.
.. admonition:: Recommendations and notes for YAML files
:class: note

View File

@ -341,6 +341,18 @@ minutes to hours) to build. Of course you only need to do that once.)
$ make lib-kim args="-p /usr/local" # use an existing KIM API installation at the provided location
$ make lib-kim args="-p /usr/local -a EAM_Dynamo_Ackland_W__MO_141627196590_002" # ditto but add one model or driver
When using the "-b " option, the KIM library is built using its native
cmake build system. The ``lib/kim/Install.py`` script supports a
``CMAKE`` environment variable if the cmake executable is named other
than ``cmake`` on your system. Additional environment variables may be
provided on the command line for use by cmake. For example, to use the
``cmake3`` executable and tell it to use the gnu version 11 compilers
to build KIM, one could use the following command line.
.. code-block:: bash
$ CMAKE=cmake3 CXX=g++-11 CC=gcc-11 FC=gfortran-11 make lib-kim args="-b " # (re-)install KIM API lib using cmake3 and gnu v11 compilers with only example models
Settings for debugging OpenKIM web queries discussed below need to
be applied by adding them to the ``LMP_INC`` variable through
editing the ``Makefile.machine`` you are using. For example:
@ -560,11 +572,26 @@ They must be specified in uppercase.
* - VEGA908
- GPU
- AMD GPU MI100 GFX908
* - INTEL_GEN
* - VEGA90A
- GPU
- Intel GPUs Gen9+
- AMD GPU
* - INTEL_DG1
- GPU
- Intel Iris XeMAX GPU
* - INTEL_GEN9
- GPU
- Intel GPU Gen9
* - INTEL_GEN11
- GPU
- Intel GPU Gen11
* - INTEL_GEN12LP
- GPU
- Intel GPU Gen12LP
* - INTEL_XEHP
- GPU
- Intel GPUs Xe-HP
This list was last updated for version 3.4.1 of the Kokkos library.
This list was last updated for version 3.5.0 of the Kokkos library.
.. tabs::

View File

@ -35,6 +35,7 @@ OPT.
* :doc:`class2 (ko) <bond_class2>`
* :doc:`fene (iko) <bond_fene>`
* :doc:`fene/expand (o) <bond_fene_expand>`
* :doc:`fene/nm <bond_fene>`
* :doc:`gaussian <bond_gaussian>`
* :doc:`gromos (o) <bond_gromos>`
* :doc:`harmonic (iko) <bond_harmonic>`

View File

@ -210,6 +210,7 @@ OPT.
* :doc:`nm/cut (o) <pair_nm>`
* :doc:`nm/cut/coul/cut (o) <pair_nm>`
* :doc:`nm/cut/coul/long (o) <pair_nm>`
* :doc:`nm/cut/split <pair_nm>`
* :doc:`oxdna/coaxstk <pair_oxdna>`
* :doc:`oxdna/excv <pair_oxdna>`
* :doc:`oxdna/hbond <pair_oxdna>`
@ -262,6 +263,7 @@ OPT.
* :doc:`spin/neel <pair_spin_neel>`
* :doc:`srp <pair_srp>`
* :doc:`sw (giko) <pair_sw>`
* :doc:`sw/mod (o) <pair_sw>`
* :doc:`table (gko) <pair_table>`
* :doc:`table/rx (k) <pair_table_rx>`
* :doc:`tdpd <pair_mesodpd>`

View File

@ -491,11 +491,6 @@ NPT ensemble using Nose-Hoover thermostat:
**(Schroeder)** Schroeder and Steinhauser, J Chem Phys, 133,
154511 (2010).
.. _Jiang2:
**(Jiang)** Jiang, Hardy, Phillips, MacKerell, Schulten, and Roux,
J Phys Chem Lett, 2, 87-92 (2011).
.. _Thole2:
**(Thole)** Chem Phys, 59, 341 (1981).

View File

@ -1,4 +1,5 @@
.. index:: bond_style fene
.. index:: bond_style fene/nm
.. index:: bond_style fene/intel
.. index:: bond_style fene/kk
.. index:: bond_style fene/omp
@ -8,12 +9,16 @@ bond_style fene command
Accelerator Variants: *fene/intel*, *fene/kk*, *fene/omp*
bond_style fene/nm command
==========================
Syntax
""""""
.. code-block:: LAMMPS
bond_style fene
bond_style fene/nm
Examples
""""""""
@ -23,6 +28,9 @@ Examples
bond_style fene
bond_coeff 1 30.0 1.5 1.0 1.0
bond_style fene/nm
bond_coeff 1 2.25344 1.5 1.0 1.12246 2 6
Description
"""""""""""
@ -38,16 +46,36 @@ term is attractive, the second Lennard-Jones term is repulsive. The
first term extends to :math:`R_0`, the maximum extent of the bond. The second
term is cutoff at :math:`2^\frac{1}{6} \sigma`, the minimum of the LJ potential.
The following coefficients must be defined for each bond type via the
:doc:`bond_coeff <bond_coeff>` command as in the example above, or in
the data file or restart files read by the :doc:`read_data <read_data>`
or :doc:`read_restart <read_restart>` commands:
The *fene/nm* bond style substitutes the standard LJ potential with the generalized LJ potential
in the same form as in pair style :doc:`nm/cut <pair_nm>`. The bond energy is then given by
.. math::
E = -0.5 K r_0^2 \ln \left[ 1 - \left(\frac{r}{R_0}\right)^2\right] + \frac{E_0}{(n-m)} \left[ m \left(\frac{r_0}{r}\right)^n - n \left(\frac{r_0}{r}\right)^m \right]
Similar to the *fene* style, the generalized Lennard-Jones is cut off at
the potential minimum, :math:`r_0`, to be repulsive only. The following
coefficients must be defined for each bond type via the :doc:`bond_coeff
<bond_coeff>` command as in the example above, or in the data file or
restart files read by the :doc:`read_data <read_data>` or
:doc:`read_restart <read_restart>` commands:
* :math:`K` (energy/distance\^2)
* :math:`R_0` (distance)
* :math:`\epsilon` (energy)
* :math:`\sigma` (distance)
For the *fene/nm* style, the following coefficients are used. Please
note, that the standard LJ potential and thus the regular FENE potential
is recovered for (n=12 m=6) and :math:`r_0 = 2^\frac{1}{6} \sigma`.
* :math:`K` (energy/distance\^2)
* :math:`R_0` (distance)
* :math:`E_0` (energy)
* :math:`r_0` (distance)
* :math:`n` (unitless)
* :math:`m` (unitless)
----------
.. include:: accel_styles.rst
@ -57,9 +85,10 @@ or :doc:`read_restart <read_restart>` commands:
Restrictions
""""""""""""
This bond style can only be used if LAMMPS was built with the MOLECULE
package. See the :doc:`Build package <Build_package>` page for more
info.
The *fene* bond style can only be used if LAMMPS was built with the MOLECULE
package; the *fene/nm* bond style can only be used if LAMMPS was built
with the EXTRA-MOLECULE package. See the :doc:`Build package <Build_package>`
page for more info.
You typically should specify :doc:`special_bonds fene <special_bonds>`
or :doc:`special_bonds lj/coul 0 1 1 <special_bonds>` to use this bond
@ -68,7 +97,8 @@ style. LAMMPS will issue a warning it that's not the case.
Related commands
""""""""""""""""
:doc:`bond_coeff <bond_coeff>`, :doc:`delete_bonds <delete_bonds>`
:doc:`bond_coeff <bond_coeff>`, :doc:`delete_bonds <delete_bonds>`,
:doc:`pair style lj/cut <pair_lj>`, :doc:`pair style nm/cut <pair_nm>`.
Default
"""""""

View File

@ -87,6 +87,7 @@ accelerated styles exist.
* :doc:`class2 <bond_class2>` - COMPASS (class 2) bond
* :doc:`fene <bond_fene>` - FENE (finite-extensible non-linear elastic) bond
* :doc:`fene/expand <bond_fene_expand>` - FENE bonds with variable size particles
* :doc:`fene/nm <bond_fene>` - FENE bonds with a generalized Lennard-Jones potential
* :doc:`gaussian <bond_gaussian>` - multicentered Gaussian-based bond potential
* :doc:`gromos <bond_gromos>` - GROMOS force field bond
* :doc:`harmonic <bond_harmonic>` - harmonic bond

View File

@ -13,7 +13,7 @@ Syntax
* ID, group-ID are documented in :doc:`compute <compute>` command
* bond/local = style name of this compute command
* one or more values may be appended
* value = *dist* or *engpot* or *force* or *fx* or *fy* or *fz* or *engvib* or *engrot* or *engtrans* or *omega* or *velvib* or *v_name*
* value = *dist* or *dx* or *dy* or *dz* or *engpot* or *force* or *fx* or *fy* or *fz* or *engvib* or *engrot* or *engtrans* or *omega* or *velvib* or *v_name*
.. parsed-literal::
@ -21,6 +21,7 @@ Syntax
*engpot* = bond potential energy
*force* = bond force
*dx*,\ *dy*,\ *dz* = components of pairwise distance
*fx*,\ *fy*,\ *fz* = components of bond force
*engvib* = bond kinetic energy of vibration
*engrot* = bond kinetic energy of rotation
@ -63,6 +64,9 @@ whether the 2 atoms represent a simple diatomic molecule, or are part
of some larger molecule.
The value *dist* is the current length of the bond.
The values *dx*, *dy*, and *dz* are the xyz components of the
*distance* between the pair of atoms. This value is always the
distance from the atom of lower to the one with the higher id.
The value *engpot* is the potential energy for the bond,
based on the current separation of the pair of atoms in the bond.

View File

@ -13,11 +13,12 @@ Syntax
* ID, group-ID are documented in :doc:`compute <compute>` command
* pair/local = style name of this compute command
* one or more values may be appended
* value = *dist* or *eng* or *force* or *fx* or *fy* or *fz* or *pN*
* value = *dist* or *dx* or *dy* or *dz* or *eng* or *force* or *fx* or *fy* or *fz* or *pN*
.. parsed-literal::
*dist* = pairwise distance
*dx*,\ *dy*,\ *dz* = components of pairwise distance
*eng* = pairwise energy
*force* = pairwise force
*fx*,\ *fy*,\ *fz* = components of pairwise force
@ -56,6 +57,9 @@ force cutoff distance for that interaction, as defined by the
commands.
The value *dist* is the distance between the pair of atoms.
The values *dx*, *dy*, and *dz* are the xyz components of the
*distance* between the pair of atoms. This value is always the
distance from the atom of lower to the one with the higher id.
The value *eng* is the interaction energy for the pair of atoms.
@ -89,10 +93,10 @@ from the second of the two sub-styles. If the referenced *pN*
is not computed for the specific pairwise interaction (based on
atom types), then the output will be 0.0.
The value *dist* will be in distance :doc:`units <units>`. The value
*eng* will be in energy :doc:`units <units>`. The values *force*, *fx*,
*fy*, and *fz* will be in force :doc:`units <units>`. The values *pN*
will be in whatever units the pair style defines.
The value *dist*, *dx*, *dy* and *dz* will be in distance :doc:`units <units>`.
The value *eng* will be in energy :doc:`units <units>`.
The values *force*, *fx*, *fy*, and *fz* will be in force :doc:`units <units>`.
The values *pN* will be in whatever units the pair style defines.
The optional *cutoff* keyword determines how the force cutoff distance
for an interaction is determined. For the default setting of *type*,

View File

@ -20,8 +20,10 @@ Syntax
cutoff = delete one atom from pairs of atoms within the cutoff (distance units)
group1-ID = one atom in pair must be in this group
group2-ID = other atom in pair must be in this group
*porosity* args = region-ID fraction seed
*porosity* args = group-ID region-ID fraction seed
group-ID = group within which to perform deletions
region-ID = region within which to perform deletions
or NULL to only impose the group criterion
fraction = delete this fraction of atoms
seed = random number seed (positive integer)
@ -43,7 +45,8 @@ Examples
delete_atoms region sphere compress no
delete_atoms overlap 0.3 all all
delete_atoms overlap 0.5 solvent colloid
delete_atoms porosity cube 0.1 482793 bond yes
delete_atoms porosity all cube 0.1 482793 bond yes
delete_atoms porosity polymer cube 0.1 482793 bond yes
Description
"""""""""""
@ -76,12 +79,17 @@ have occurred that no atom pairs within the cutoff will remain
minimum number of atoms will be deleted, or that the same atoms will
be deleted when running on different numbers of processors.
For style *porosity* a specified *fraction* of atoms are deleted
within the specified region. For example, if fraction is 0.1, then
10% of the atoms will be deleted. The atoms to delete are chosen
randomly. There is no guarantee that the exact fraction of atoms will
be deleted, or that the same atoms will be deleted when running on
different numbers of processors.
For style *porosity* a specified *fraction* of atoms are deleted which
are both in the specified group and within the specified region. The
region-ID can be specified as NULL to only impose the group criterion.
Likewise, specifying the group-ID as *all* will only impose the region
criterion.
For example, if fraction is 0.1, then 10% of the eligible atoms will
be deleted. The atoms to delete are chosen randomly. There is no
guarantee that the exact fraction of atoms will be deleted, or that
the same atoms will be deleted when running on different numbers of
processors.
If the *compress* keyword is set to *yes*, then after atoms are
deleted, then atom IDs are re-assigned so that they run from 1 to the
@ -89,8 +97,8 @@ number of atoms in the system. Note that this is not done for
molecular systems (see the :doc:`atom_style <atom_style>` command),
regardless of the *compress* setting, since it would foul up the bond
connectivity that has already been assigned. However, the
:doc:`reset_atom_ids <reset_atom_ids>` command can be used after this command to
accomplish the same thing.
:doc:`reset_atom_ids <reset_atom_ids>` command can be used after this
command to accomplish the same thing.
Note that the re-assignment of IDs is not really a compression, where
gaps in atom IDs are removed by decrementing atom IDs that are larger.
@ -100,15 +108,15 @@ the :doc:`create_atoms <create_atoms>` command explains.
A molecular system with fixed bonds, angles, dihedrals, or improper
interactions, is one where the topology of the interactions is
typically defined in the data file read by the
:doc:`read_data <read_data>` command, and where the interactions
themselves are defined with the :doc:`bond_style <bond_style>`,
:doc:`angle_style <angle_style>`, etc commands. If you delete atoms
from such a system, you must be careful not to end up with bonded
interactions that are stored by remaining atoms but which include
deleted atoms. This will cause LAMMPS to generate a "missing atoms"
error when the bonded interaction is computed. The *bond* and *mol*
keywords offer two ways to do that.
typically defined in the data file read by the :doc:`read_data
<read_data>` command, and where the interactions themselves are
defined with the :doc:`bond_style <bond_style>`, :doc:`angle_style
<angle_style>`, etc commands. If you delete atoms from such a system,
you must be careful not to end up with bonded interactions that are
stored by remaining atoms but which include deleted atoms. This will
cause LAMMPS to generate a "missing atoms" error when the bonded
interaction is computed. The *bond* and *mol* keywords offer two ways
to do that.
It the *bond* keyword is set to *yes* then any bond or angle or
dihedral or improper interaction that includes a deleted atom is also

View File

@ -708,8 +708,9 @@ are part of the MPIIO package. They are only enabled if LAMMPS was
built with that package. See the :doc:`Build package <Build_package>`
doc page for more info.
The *xtc* style is part of the MISC package. It is only enabled if
LAMMPS was built with that package. See the :doc:`Build package <Build_package>` page for more info.
The *xtc* and *dcd* styles are part of the EXTRA-DUMP package. They
are only enabled if LAMMPS was built with that package. See the
:doc:`Build package <Build_package>` page for more info.
Related commands
""""""""""""""""

View File

@ -99,7 +99,7 @@ invoked by the :doc:`minimize <minimize>` command.
Restrictions
""""""""""""
This fix is part of the MISC package. It is only enabled if
This fix is part of the EXTRA-FIX package. It is only enabled if
LAMMPS was built with that package. See the :doc:`Build package
<Build_package>` page for more info.

View File

@ -40,7 +40,7 @@ Example input scripts available: examples/PACKAGES/drude
Description
"""""""""""
Apply two Langevin thermostats as described in :ref:`(Jiang) <Jiang1>` for
Apply two Langevin thermostats as described in :ref:`(Jiang1) <Jiang1>` for
thermalizing the reduced degrees of freedom of Drude oscillators.
This link describes how to use the :doc:`thermalized Drude oscillator model <Howto_drude>` in LAMMPS and polarizable models in LAMMPS
are discussed on the :doc:`Howto polarizable <Howto_polarizable>` doc
@ -300,5 +300,5 @@ The option defaults are zero = no.
.. _Jiang1:
**(Jiang)** Jiang, Hardy, Phillips, MacKerell, Schulten, and Roux, J
**(Jiang1)** Jiang, Hardy, Phillips, MacKerell, Schulten, and Roux, J
Phys Chem Lett, 2, 87-92 (2011).

View File

@ -51,7 +51,7 @@ the :doc:`run <run>` command. This fix is not invoked during :doc:`energy minim
Restrictions
""""""""""""
This fix is part of the MISC package. It is only enabled if LAMMPS
This fix is part of the EXTRA-FIX package. It is only enabled if LAMMPS
was built with that package. See the :doc:`Build package <Build_package>` page for more info.
Related commands

View File

@ -144,7 +144,7 @@ the :doc:`run <run>` command. This fix is not invoked during
Restrictions
""""""""""""
This fix is part of the MISC package. It is only enabled if
This fix is part of the EXTRA-FIX package. It is only enabled if
LAMMPS was built with that package. See the :doc:`Build package <Build_package>` page for more info.
Related commands

View File

@ -74,14 +74,17 @@ atoms interact with each other via an *eam* potential, the surface atoms
interact with each other via a *lj/cut* potential, and the metal/surface
interaction is also computed via a *lj/cut* potential. The
*hybrid/overlay* style could be used as in the second example above,
where multiple potentials are superposed in an additive fashion to
where multiple potentials are superimposed in an additive fashion to
compute the interaction between atoms. In this example, using *lj/cut*
and *coul/long* together gives the same result as if the
*lj/cut/coul/long* potential were used by itself. In this case, it
would be more efficient to use the single combined potential, but in
general any combination of pair potentials can be used together in to
produce an interaction that is not encoded in any single pair_style
file, e.g. adding Coulombic forces between granular particles.
file, e.g. adding Coulombic forces between granular particles. Another
limitation of using the *hybrid/overlay* variant, that it does not generate
*lj/cut* parameters for mixed atom types from a mixing rule due to
restrictions discussed below.
If the *hybrid/scaled* style is used instead of *hybrid/overlay*,
contributions from sub-styles are weighted by their scale factors, which
@ -150,10 +153,14 @@ with Tersoff, and the cross-interactions with Lennard-Jones:
pair_coeff * * tersoff 2 C.tersoff NULL C
pair_coeff 1 2 lj/cut 1.0 1.5
If pair coefficients are specified in the data file read via the
:doc:`read_data <read_data>` command, then the same rule applies.
E.g. "eam/alloy" or "lj/cut" must be added after the atom type, for
each line in the "Pair Coeffs" section, e.g.
It is not recommended to read pair coefficients for a hybrid style from a "Pair Coeffs"
or "PairIJ Coeffs" section of a data file via the :doc:`read_data <read_data>` command,
since those sections expect a fixed number of lines, either one line per atom type or
one line pair pair of atom types, respectively. When reading from a data file, the
lines of the "Pair Coeffs" and "PairIJ Coeffs" are changed in the same way as the *pair_coeff*
command, i.e. the name of the pair style to which the parameters apply must follow the
atom type (or atom types), e.g.
.. parsed-literal::
@ -162,6 +169,11 @@ each line in the "Pair Coeffs" section, e.g.
1 lj/cut/coul/cut 1.0 1.0
...
PairIJ Coeffs
1 1 lj/cut/coul/cut 1.0 1.0
...
Note that the pair_coeff command for some potentials such as
:doc:`pair_style eam/alloy <pair_eam>` includes a mapping specification
of elements to all atom types, which in the hybrid case, can include
@ -208,12 +220,22 @@ examples above, or in the data file read by the :doc:`read_data
<read_data>`, or by mixing as described below. Also all sub-styles
must be used at least once in a :doc:`pair_coeff <pair_coeff>` command.
.. note::
.. warning::
LAMMPS never performs mixing of parameters from different sub-styles,
**even** if they use the same type of coefficients, e.g. contain
a Lennard-Jones potential variant. Those parameters must be provided
explicitly.
With hybrid pair styles the use of mixing to generate pair
coefficients is significantly limited compared to the individual pair
styles. LAMMPS **never** performs mixing of parameters from
different sub-styles, **even** if they use the same type of
coefficients, e.g. contain a Lennard-Jones potential variant. Those
parameters must be provided explicitly. Also for *hybrid/overlay*
and *hybrid/scaled* mixing is **only** performed for pairs of atom
types for which only a single pair style is assigned.
Thus it is strongly recommended to provide all mixed terms
explicitly. For non-hybrid styles those could be generated and
written out using the :doc:`write_coeff command <write_coeff>` and
then edited as needed to comply with the requirements for hybrid
styles as explained above.
If you want there to be no interactions between a particular pair of
atom types, you have 3 choices. You can assign the pair of atom types

View File

@ -26,23 +26,25 @@ Examples
Description
"""""""""""
The local density (LD) potential is a mean-field manybody potential, and, in some
sense,a generalization of embedded atom models (EAM). The name "local density
potential" arises from the fact that it assigns an energy to an atom depending
on the number of neighboring atoms of given type around it within a predefined
spherical volume (i.e., within a cutoff). The bottom-up coarse-graining (CG)
literature suggests that such potentials can be widely useful in capturing
effective multibody forces in a computationally efficient manner so as to
improve the quality of CG models of implicit solvation:ref:`(Sanyal1) <Sanyal1>` and
phase-segregation in liquid mixtures:ref:`(Sanyal2) <Sanyal2>`, and provide guidelines
to determine the extent of manybody correlations present in a CG
model.:ref:`(Rosenberger) <Rosenberger>` The LD potential in LAMMPS is primarily
intended to be used as a corrective potential over traditional pair potentials
in bottom-up CG models, i.e., as a hybrid pair style with
other explicit pair interaction terms (e.g., table spline, Lennard Jones, etc.).
Because the LD potential is not a pair potential per se, it is implemented
simply as a single auxiliary file with all specifications that will be read
upon initialization.
The local density (LD) potential is a mean-field manybody potential,
and, in some way, a generalization of embedded atom models (EAM). The
name "local density potential" arises from the fact that it assigns an
energy to an atom depending on the number of neighboring atoms of a
given type around it within a predefined spherical volume (i.e., within
the cutoff). The bottom-up coarse-graining (CG) literature suggests
that such potentials can be widely useful in capturing effective
multibody forces in a computationally efficient manner and thus improve
the quality of CG models of implicit solvation :ref:`(Sanyal1)
<Sanyal1>` and phase-segregation in liquid mixtures :ref:`(Sanyal2)
<Sanyal2>`, and provide guidelines to determine the extent of manybody
correlations present in a CG model :ref:`(Rosenberger) <Rosenberger>`.
The LD potential in LAMMPS is primarily intended to be used as a
corrective potential over traditional pair potentials in bottom-up CG
models via :doc:`hybrid/overlay pair style <pair_hybrid>` with other
explicit pair interaction terms (e.g., tabulated, Lennard-Jones, Morse
etc.). Because the LD potential is not a pair potential per se, it is
implemented simply as a single auxiliary file with all specifications
that will be read upon initialization.
.. note::

View File

@ -71,21 +71,23 @@ The *mix* keyword affects pair coefficients for interactions between
atoms of type I and J, when I != J and the coefficients are not
explicitly set in the input script. Note that coefficients for I = J
must be set explicitly, either in the input script via the
:doc:`pair_coeff <pair_coeff>` command or in the "Pair Coeffs" section of the
:doc:`data file <read_data>`. For some pair styles it is not
:doc:`pair_coeff <pair_coeff>` command or in the "Pair Coeffs" or "PairIJ Coeffs"
sections of the :doc:`data file <read_data>`. For some pair styles it is not
necessary to specify coefficients when I != J, since a "mixing" rule
will create them from the I,I and J,J settings. The pair_modify
*mix* value determines what formulas are used to compute the mixed
coefficients. In each case, the cutoff distance is mixed the same way
as sigma.
Note that not all pair styles support mixing and some mix options
are not available for certain pair styles. Also, there are additional
restrictions when using :doc:`pair style hybrid or hybrid/overlay <pair_hybrid>`.
See the page for individual pair styles for those restrictions. Note also that the
:doc:`pair_coeff <pair_coeff>` command also can be used to directly set
coefficients for a specific I != J pairing, in which case no mixing is
performed.
Note that not all pair styles support mixing and some mix options are
not available for certain pair styles. Also, there are additional
restrictions when using :doc:`pair style hybrid or hybrid/overlay
<pair_hybrid>`. See the page for individual pair styles for those
restrictions. Note also that the :doc:`pair_coeff <pair_coeff>` command
also can be used to directly set coefficients for a specific I != J
pairing, in which case no mixing is performed. If possible, LAMMPS will
print an informational message about how many of the mixed pair
coefficients were generated and which mixing rule was applied.
- mix *geometric*

View File

@ -1,4 +1,5 @@
.. index:: pair_style nm/cut
.. index:: pair_style nm/cut/split
.. index:: pair_style nm/cut/coul/cut
.. index:: pair_style nm/cut/coul/long
.. index:: pair_style nm/cut/omp
@ -10,6 +11,9 @@ pair_style nm/cut command
Accelerator Variants: *nm/cut/omp*
pair_style nm/cut/split command
===============================
pair_style nm/cut/coul/cut command
==================================
@ -27,13 +31,15 @@ Syntax
pair_style style args
* style = *nm/cut* or *nm/cut/coul/cut* or *nm/cut/coul/long*
* style = *nm/cut* or *nm/cut/split* or *nm/cut/coul/cut* or *nm/cut/coul/long*
* args = list of arguments for a particular style
.. parsed-literal::
*nm/cut* args = cutoff
cutoff = global cutoff for Pair interactions (distance units)
*nm/cut/split* args = cutoff
cutoff = global cutoff for Pair interactions (distance units)
*nm/cut/coul/cut* args = cutoff (cutoff2)
cutoff = global cutoff for Pair (and Coulombic if only 1 arg) (distance units)
cutoff2 = global cutoff for Coulombic (optional) (distance units)
@ -50,6 +56,10 @@ Examples
pair_coeff * * 0.01 5.4 8.0 7.0
pair_coeff 1 1 0.01 4.4 7.0 6.0
pair_style nm/cut/split 1.12246
pair_coeff 1 1 1.0 1.1246 12 6
pair_coeff * * 1.0 1.1246 11 6
pair_style nm/cut/coul/cut 12.0 15.0
pair_coeff * * 0.01 5.4 8.0 7.0
pair_coeff 1 1 0.01 4.4 7.0 6.0
@ -71,7 +81,15 @@ interaction has the following form:
E = \frac{E_0}{(n-m)} \left[ m \left(\frac{r_0}{r}\right)^n - n
\left(\frac{r_0}{r}\right)^m \right] \qquad r < r_c
where :math:`r_c` is the cutoff.
where :math:`r_c` is the cutoff and :math:`r_0` is the minimum of the
potential. Please note that this differs from the convention used for
other Lennard-Jones potentials in LAMMPS where :math:`\sigma` represents
the location where the energy is zero.
Style *nm/cut/split* applies the standard LJ (12-6) potential above
:math:`r_0 = 2^\frac{1}{6}\sigma`. Style *nm/cut/split* is employed in
polymer equilibration protocols that combine core-softening approaches
with topology-changing moves :ref:`Dietz <Dietz>`.
Style *nm/cut/coul/cut* adds a Coulombic pairwise interaction given by
@ -155,7 +173,6 @@ the :doc:`run_style respa <run_style>` command. They do not support the
Restrictions
""""""""""""
These pair styles are part of the EXTRA-PAIR package. They are only enabled if
LAMMPS was built with that package. See the
:doc:`Build package <Build_package>` page for more info.
@ -163,7 +180,7 @@ LAMMPS was built with that package. See the
Related commands
""""""""""""""""
:doc:`pair_coeff <pair_coeff>`
:doc:`pair_coeff <pair_coeff>`, :doc:`pair style lj/cut <pair_lj>`, :doc:`bond style fene/nm <bond_fene>`
Default
"""""""
@ -175,3 +192,8 @@ none
.. _Clarke:
**(Clarke)** Clarke and Smith, J Chem Phys, 84, 2290 (1986).
.. _Dietz:
**(Dietz)** J.D. Dietz, R.S. Hoy, "Facile equilibration of well-entangled
semi-flexible bead-spring polymer melts" arXiv:2109.11001

View File

@ -274,6 +274,7 @@ accelerated styles exist.
* :doc:`nm/cut <pair_nm>` - N-M potential
* :doc:`nm/cut/coul/cut <pair_nm>` - N-M potential with cutoff Coulomb
* :doc:`nm/cut/coul/long <pair_nm>` - N-M potential with long-range Coulomb
* :doc:`nm/cut/split <pair_nm>` - Split 12-6 Lennard-Jones and N-M potential
* :doc:`oxdna/coaxstk <pair_oxdna>` -
* :doc:`oxdna/excv <pair_oxdna>` -
* :doc:`oxdna/hbond <pair_oxdna>` -
@ -327,6 +328,7 @@ accelerated styles exist.
* :doc:`spin/neel <pair_spin_neel>` -
* :doc:`srp <pair_srp>` -
* :doc:`sw <pair_sw>` - Stillinger-Weber 3-body potential
* :doc:`sw/mod <pair_sw>` - modified Stillinger-Weber 3-body potential
* :doc:`table <pair_table>` - tabulated pair potential
* :doc:`table/rx <pair_table_rx>` -
* :doc:`tdpd <pair_mesodpd>` - tDPD particle interactions

View File

@ -3,18 +3,34 @@
.. index:: pair_style sw/intel
.. index:: pair_style sw/kk
.. index:: pair_style sw/omp
.. index:: pair_style sw/mod
.. index:: pair_style sw/mod/omp
pair_style sw command
=====================
Accelerator Variants: *sw/gpu*, *sw/intel*, *sw/kk*, *sw/omp*
pair_style sw/mod command
=========================
Accelerator Variants: *sw/mod/omp*
Syntax
""""""
.. code-block:: LAMMPS
pair_style sw
pair_style style keyword values
* style = *sw* or *sw/mod*
* keyword = *maxdelcs*
.. parsed-literal::
*maxdelcs* value = delta1 delta2 (optional)
delta1 = The minimum thershold for cosine of three-body angle
delta2 = The maximum threshold for cosine of three-body angle
Examples
""""""""
@ -25,6 +41,9 @@ Examples
pair_coeff * * si.sw Si
pair_coeff * * GaN.sw Ga N Ga
pair_style sw/mod maxdelcs 0.25 0.35
pair_coeff * * tmd.sw.mod Mo S S
Description
"""""""""""
@ -48,8 +67,52 @@ where :math:`\phi_2` is a two-body term and :math:`\phi_3` is a
three-body term. The summations in the formula are over all neighbors J
and K of atom I within a cutoff distance :math:`a `\sigma`.
Only a single pair_coeff command is used with the *sw* style which
specifies a Stillinger-Weber potential file with parameters for all
The *sw/mod* style is designed for simulations of materials when
distinguishing three-body angles are necessary, such as borophene
and transition metal dichalcogenide, which cannot be described
by the original code for the Stillinger-Weber potential.
For instance, there are several types of angles around each Mo atom in `MoS_2`,
and some unnecessary angle types should be excluded in the three-body interaction.
Such exclusion may be realized by selecting proper angle types directly.
The exclusion of unnecessary angles is achieved here by the cut-off function (`f_C(\delta)`),
which induces only minimum modifications for LAMMPS.
Validation, benchmark tests, and applications of the *sw/mod* style
can be found in :ref:`(Jiang2) <Jiang2>` and :ref:`(Jiang3) <Jiang3>`.
The *sw/mod* style computes the energy E of a system of atoms, whose potential
function is mostly the same as the Stillinger-Weber potential. The only modification
is in the three-body term, where the value of :math:`\delta = \cos \theta_{ijk} - \cos \theta_{0ijk}`
used in the original energy and force expression is scaled by a switching factor :math:`f_C(\delta)`:
.. math::
f_C(\delta) & = \left\{ \begin{array} {r@{\quad:\quad}l}
1 & \left| \delta \right| < \delta_1 \\
\frac{1}{2} + \frac{1}{2} \cos \left( \pi \frac{\left| \delta \right| - \delta_1}{\delta_2 - \delta_1} \right) &
\delta_1 < \left| \delta \right| < \delta_2 \\
0 & \left| \delta \right| > \delta_2
\end{array} \right. \\
This cut-off function decreases smoothly from 1 to 0 over the range :math:`[\delta_1, \delta_2]`.
This smoothly turns off the energy and force contributions for :math:`\left| \delta \right| > \delta_2`.
It is suggested that :math:`\delta 1` and :math:`\delta_2` to be the value around
:math:`0.5 \left| \cos \theta_1 - \cos \theta_2 \right|`, with
:math:`\theta_1` and :math:`\theta_2` as the different types of angles around an atom.
For borophene and transition metal dichalcogenide, :math:`\delta_1 = 0.25` and :math:`\delta_2 = 0.35`.
This value enables the cut-off function to exclude unnecessary angles in the three-body SW terms.
.. note::
The cut-off function is just to be used as a technique to exclude some unnecessary angles,
and it has no physical meaning. It should be noted that the force and potential are inconsistent
with each other in the decaying range of the cut-off function, as the angle dependence for the
cut-off function is not implemented in the force (first derivation of potential).
However, the angle variation is much smaller than the given threshold value for actual simulations,
so the inconsistency between potential and force can be neglected in actual simulations.
Only a single pair_coeff command is used with the *sw* and *sw/mod* styles
which specifies a Stillinger-Weber potential file with parameters for all
needed elements. These are mapped to LAMMPS atom types by specifying
N additional arguments after the filename in the pair_coeff command,
where N is the number of LAMMPS atom types:
@ -213,10 +276,19 @@ Related commands
Default
"""""""
none
The default values for the *maxdelcs* setting of the *sw/mod* pair
style are *delta1* = 0.25 and *delta2* = 0.35`.
----------
.. _Stillinger2:
**(Stillinger)** Stillinger and Weber, Phys Rev B, 31, 5262 (1985).
.. _Jiang2:
**(Jiang2)** J.-W. Jiang, Nanotechnology 26, 315706 (2015).
.. _Jiang3:
**(Jiang3)** J.-W. Jiang, Acta Mech. Solida. Sin 32, 17 (2019).

View File

@ -23,7 +23,7 @@ Syntax
pair_style style keywords values
* style = *tersoff* or *tersoff/table* or *tersoff/gpu* or *tersoff/omp* or *tersoff/table/omp*
* style = *tersoff* or *tersoff/table*
* keyword = *shift*
.. parsed-literal::

View File

@ -17,7 +17,7 @@ Syntax
pair_style style args
* style = *thole* or *lj/cut/thole/long* or *lj/cut/thole/long/omp*
* style = *thole* or *lj/cut/thole/long*
* args = list of arguments for a particular style
.. parsed-literal::
@ -25,7 +25,7 @@ Syntax
*thole* args = damp cutoff
damp = global damping parameter
cutoff = global cutoff (distance units)
*lj/cut/thole/long* or *lj/cut/thole/long/omp* args = damp cutoff (cutoff2)
*lj/cut/thole/long* args = damp cutoff (cutoff2)
damp = global damping parameter
cutoff = global cutoff for LJ (and Thole if only 1 arg) (distance units)
cutoff2 = global cutoff for Thole (optional) (distance units)

View File

@ -22,13 +22,13 @@ Syntax
pair_style style args
* style = *vashishta* or *vashishta/table* or *vashishta/omp* or *vashishta/table/omp*
* style = *vashishta* or *vashishta/table*
* args = list of arguments for a particular style
.. parsed-literal::
*vashishta* or *vashishta/omp* args = none
*vashishta/table* or *vashishta/table/omp* args = Ntable cutinner
*vashishta* args = none
*vashishta/table* args = Ntable cutinner
Ntable = # of tabulation points
cutinner = tablulate from cutinner to cutoff

View File

@ -98,8 +98,7 @@ command, after the dump snapshot is read.
----------
If the dump filename specified as *file* ends with ".gz", the dump
file is read in gzipped format. You cannot (yet) read a dump file
that was written in binary format with a ".bin" suffix.
file is read in gzipped format.
You can read dump files that were written (in parallel) to multiple
files via the "%" wild-card character in the dump file name. If any
@ -115,8 +114,8 @@ to tell LAMMPS how many parallel files exist, via its specified
The format of the dump file is selected through the *format* keyword.
If specified, it must be the last keyword used, since all remaining
arguments are passed on to the dump reader. The *native* format is
for native LAMMPS dump files, written with a :doc:`dump atom <dump>` or
:doc:`dump custom <dump>` command. The *xyz* format is for generic XYZ
for native LAMMPS dump files, written with a :doc:`dump atom <dump>`
or :doc:`dump custom <dump>` command. The *xyz* format is for generic XYZ
formatted dump files. These formats take no additional values.
The *molfile* format supports reading data through using the `VMD <vmd_>`_
@ -370,8 +369,6 @@ needed to generate absolute, unscaled coordinates.
Restrictions
""""""""""""
The *native* dump file reader does not support binary .bin dump files.
To read gzipped dump files, you must compile LAMMPS with the
-DLAMMPS_GZIP option. See the :doc:`Build settings <Build_settings>`
doc page for details.

View File

@ -308,6 +308,7 @@ boolean
boostostat
boostostatting
Boresch
borophene
Botero
Botu
Bouguet
@ -688,8 +689,10 @@ diagonalizers
diagonalizing
Diallo
diblock
dichalcogenide
Dickel
diel
Dietz
differentiable
diffusively
diffusivity
@ -1309,6 +1312,7 @@ hotpink
Houlle
howto
Howto
Hoy
Hoyt
Hs
hstyle
@ -2390,6 +2394,7 @@ ohenrich
ok
Okabe
Okamoto
O'Hearn
O'Keefe
OKeefe
oldlace
@ -3078,6 +3083,7 @@ snav
Snodin
Sodani
Soderlind
Solida
solvated
solvation
someuser

View File

@ -1,6 +1,8 @@
# NOTE: This script can be modified for different pair styles
# See in.elastic for more info.
# we must undefine any fix ave/* fix before using reset_timestep
if "$(is_defined(fix,avp)" then "unfix avp"
reset_timestep 0
# Choose potential

View File

@ -1,4 +1,4 @@
# local density potentials: (B,B), (W,W), (B,W), (W,B)
# local density potentials: (B,B), (W,W), (B,W), (W,B) UNITS: real
4 500

View File

@ -1,4 +1,4 @@
# UNITS: real
PairBB
N 500 R 2.00000e-02 1.32500e+01

View File

@ -11,7 +11,7 @@
# Initialize simulation box
dimension 3
boundary p p p
boundary p p p
units real
atom_style molecular
@ -32,7 +32,7 @@ pair_coeff * * local/density benzene_water.localdensity.table
fix recentering all recenter 0.0 0.0 0.0 units box
# Thermostat & time integration
timestep 2.0
timestep 2.0
thermo 100
thermo_style custom temp ke pe etotal ebond eangle edihed evdwl
@ -49,14 +49,14 @@ run 5000
# Turn off recentering during production phase
unfix recentering
reset_timestep 0
# Setup trajectory output
dump myDump all custom 100 benzene_water.lammpstrj.gz id type x y z element
dump_modify myDump element B W
dump_modify myDump sort id
#dump myDump all custom 100 benzene_water.lammpstrj.gz id type x y z element
#dump_modify myDump element B W
#dump_modify myDump sort id
# Production (for realistic results, run for 10000000 steps)
reset_timestep 0
run 1000
run 1000

View File

@ -1,267 +0,0 @@
LAMMPS (7 Aug 2019)
# LAMMPS input file for 26.5% benzene mole fraction solution
# with 380 benzene and 1000 water molecules,
# using all possible local density potentials
# between benzene and water
#
# Author: Tanmoy Sanyal, Shell Group, UC Santa Barbara
#
# Refer: Sanyal and Shell, JPC-B, 2018, 122 (21), 5678-5693
# Initialize simulation box
dimension 3
boundary p p p
units real
atom_style molecular
# Set potential styles
pair_style hybrid/overlay table spline 500 local/density
# Read molecule data and set initial velocities
read_data benzene_water.data
orthogonal box = (-12.865 -12.865 -64.829) to (12.865 12.865 64.829)
1 by 1 by 8 MPI processor grid
reading atoms ...
1380 atoms
0 = max # of 1-2 neighbors
0 = max # of 1-3 neighbors
0 = max # of 1-4 neighbors
1 = max # of special neighbors
special bonds CPU = 0.000566959 secs
read_data CPU = 0.00661397 secs
velocity all create 3.0000e+02 16611 rot yes dist gaussian
# Assign potentials
pair_coeff 1 1 table benzene_water.pair.table PairBB
WARNING: 33 of 500 force values in table are inconsistent with -dE/dr.
Should only be flagged at inflection points (../pair_table.cpp:483)
WARNING: 150 of 500 distance values in table with relative error
over 1e-06 to re-computed values (../pair_table.cpp:492)
pair_coeff 1 2 table benzene_water.pair.table PairWW
WARNING: 61 of 500 force values in table are inconsistent with -dE/dr.
Should only be flagged at inflection points (../pair_table.cpp:483)
WARNING: 90 of 500 distance values in table with relative error
over 1e-06 to re-computed values (../pair_table.cpp:492)
pair_coeff 2 2 table benzene_water.pair.table PairBW
WARNING: 108 of 500 force values in table are inconsistent with -dE/dr.
Should only be flagged at inflection points (../pair_table.cpp:483)
WARNING: 135 of 500 distance values in table with relative error
over 1e-06 to re-computed values (../pair_table.cpp:492)
pair_coeff * * local/density benzene_water.localdensity.table
# Recentering during minimization and equilibration
fix recentering all recenter 0.0 0.0 0.0 units box
# Thermostat & time integration
timestep 2.0
thermo 100
thermo_style custom temp ke pe etotal ebond eangle edihed evdwl
# Minimization
minimize 1.e-4 0.0 10000 10000
WARNING: Using 'neigh_modify every 1 delay 0 check yes' setting during minimization (../min.cpp:168)
Neighbor list info ...
update every 1 steps, delay 0 steps, check yes
max neighbors/atom: 2000, page size: 100000
master list distance cutoff = 15.25
ghost atom cutoff = 15.25
binsize = 7.625, bins = 4 4 18
2 neighbor lists, perpetual/occasional/extra = 2 0 0
(1) pair table, perpetual
attributes: half, newton on
pair build: half/bin/newton
stencil: half/bin/3d/newton
bin: standard
(2) pair local/density, perpetual, copy from (1)
attributes: half, newton on
pair build: copy
stencil: none
bin: none
Per MPI rank memory allocation (min/avg/max) = 8.061 | 8.32 | 8.674 Mbytes
Temp KinEng PotEng TotEng E_bond E_angle E_dihed E_vdwl
300 1233.1611 4162.3053 5395.4665 0 0 0 4162.3053
300 1233.1611 2275.526 3508.6871 0 0 0 2275.526
Loop time of 0.352822 on 8 procs for 40 steps with 1380 atoms
71.3% CPU use with 8 MPI tasks x no OpenMP threads
Minimization stats:
Stopping criterion = linesearch alpha is zero
Energy initial, next-to-last, final =
4162.30533361 2208.86525108 2275.52597861
Force two-norm initial, final = 259.364 69.3915
Force max component initial, final = 22.2077 8.31436
Final line search alpha, max atom move = 2.90022e-12 2.41135e-11
Iterations, force evaluations = 40 110
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 0.053192 | 0.23903 | 0.32779 | 17.2 | 67.75
Bond | 9.0599e-06 | 1.6302e-05 | 2.5272e-05 | 0.0 | 0.00
Neigh | 0.00044513 | 0.0023614 | 0.0063851 | 5.1 | 0.67
Comm | 0.015469 | 0.090432 | 0.20295 | 20.0 | 25.63
Output | 0 | 0 | 0 | 0.0 | 0.00
Modify | 0 | 0 | 0 | 0.0 | 0.00
Other | | 0.02098 | | | 5.95
Nlocal: 172.5 ave 348 max 72 min
Histogram: 5 0 0 0 0 0 0 0 1 2
Nghost: 2193.62 ave 4352 max 932 min
Histogram: 3 0 0 2 0 0 2 0 0 1
Neighs: 9700.5 ave 20535 max 3685 min
Histogram: 5 0 0 0 0 0 0 1 0 2
Total # of neighbors = 77604
Ave neighs/atom = 56.2348
Ave special neighs/atom = 0
Neighbor list builds = 2
Dangerous builds = 0
# Set up integration parameters
fix timeintegration all nve
fix thermostat all langevin 3.0000e+02 3.0000e+02 1.0000e+02 81890
# Equilibration (for realistic results, run for 5000000 steps)
reset_timestep 0
run 5000
WARNING: Fix recenter should come after all other integration fixes (../fix_recenter.cpp:131)
Per MPI rank memory allocation (min/avg/max) = 6.936 | 7.195 | 7.552 Mbytes
Temp KinEng PotEng TotEng E_bond E_angle E_dihed E_vdwl
300 1233.1611 2866.9109 4100.0721 0 0 0 2866.9109
273.33541 1123.5553 3983.2007 5106.756 0 0 0 3983.2007
293.68078 1207.1857 3319.6601 4526.8458 0 0 0 3319.6601
314.21462 1291.5908 3389.2178 4680.8086 0 0 0 3389.2178
323.77563 1330.8917 3332.9828 4663.8745 0 0 0 3332.9828
302.5902 1243.8082 3461.7692 4705.5774 0 0 0 3461.7692
295.39324 1214.2249 3411.5727 4625.7976 0 0 0 3411.5727
320.52341 1317.5234 3453.1931 4770.7164 0 0 0 3453.1931
312.00777 1282.5195 3403.3443 4685.8638 0 0 0 3403.3443
307.96774 1265.9128 3429.7809 4695.6937 0 0 0 3429.7809
294.75922 1211.6187 3388.8404 4600.4591 0 0 0 3388.8404
311.24567 1279.3869 3514.9603 4794.3472 0 0 0 3514.9603
306.6152 1260.3531 3447.2011 4707.5542 0 0 0 3447.2011
305.23306 1254.6718 3375.5092 4630.181 0 0 0 3375.5092
321.62889 1322.0675 3460.2581 4782.3256 0 0 0 3460.2581
316.37725 1300.4804 3437.0312 4737.5116 0 0 0 3437.0312
322.90522 1327.3139 3389.1262 4716.44 0 0 0 3389.1262
307.57893 1264.3146 3359.8491 4624.1637 0 0 0 3359.8491
302.22607 1242.3115 3406.1711 4648.4826 0 0 0 3406.1711
302.73997 1244.4239 3220.2582 4464.6821 0 0 0 3220.2582
303.66194 1248.2137 3318.4629 4566.6765 0 0 0 3318.4629
308.73862 1269.0815 3369.5894 4638.671 0 0 0 3369.5894
315.60294 1297.2976 3411.2405 4708.5381 0 0 0 3411.2405
310.0113 1274.3129 3360.1054 4634.4183 0 0 0 3360.1054
302.36229 1242.8714 3326.9845 4569.8559 0 0 0 3326.9845
317.78659 1306.2735 3355.4976 4661.7711 0 0 0 3355.4976
302.50479 1243.4571 3317.6846 4561.1417 0 0 0 3317.6846
304.29249 1250.8056 3423.5068 4674.3124 0 0 0 3423.5068
305.99948 1257.8222 3432.9395 4690.7617 0 0 0 3432.9395
309.93363 1273.9937 3393.657 4667.6506 0 0 0 3393.657
316.14884 1299.5415 3463.0636 4762.6051 0 0 0 3463.0636
300.38817 1234.7567 3309.2495 4544.0062 0 0 0 3309.2495
311.05735 1278.6128 3304.4418 4583.0546 0 0 0 3304.4418
311.11872 1278.865 3291.1891 4570.0542 0 0 0 3291.1891
315.74338 1297.8749 3341.3063 4639.1812 0 0 0 3341.3063
297.5658 1223.1552 3316.3862 4539.5414 0 0 0 3316.3862
311.79033 1281.6257 3357.4556 4639.0813 0 0 0 3357.4556
310.93666 1278.1167 3414.7694 4692.8861 0 0 0 3414.7694
307.37298 1263.468 3337.3889 4600.8569 0 0 0 3337.3889
298.84185 1228.4005 3329.6173 4558.0178 0 0 0 3329.6173
310.54684 1276.5143 3351.0852 4627.5995 0 0 0 3351.0852
300.0871 1233.5191 3302.2315 4535.7506 0 0 0 3302.2315
304.69078 1252.4427 3324.2508 4576.6935 0 0 0 3324.2508
313.50714 1288.6827 3330.4088 4619.0915 0 0 0 3330.4088
329.80018 1355.6559 3301.86 4657.5159 0 0 0 3301.86
304.57609 1251.9713 3365.2938 4617.2652 0 0 0 3365.2938
308.73584 1269.0701 3344.4155 4613.4856 0 0 0 3344.4155
306.90951 1261.5629 3304.4698 4566.0327 0 0 0 3304.4698
308.85761 1269.5707 3392.1511 4661.7218 0 0 0 3392.1511
302.78788 1244.6208 3317.0849 4561.7057 0 0 0 3317.0849
321.68092 1322.2813 3321.5755 4643.8568 0 0 0 3321.5755
Loop time of 16.3061 on 8 procs for 5000 steps with 1380 atoms
Performance: 52.986 ns/day, 0.453 hours/ns, 306.634 timesteps/s
69.6% CPU use with 8 MPI tasks x no OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 2.1872 | 10.542 | 14.607 | 116.7 | 64.65
Bond | 0.00044084 | 0.00069669 | 0.00095081 | 0.0 | 0.00
Neigh | 0.026948 | 0.15225 | 0.44344 | 42.0 | 0.93
Comm | 0.63452 | 4.2953 | 9.49 | 133.9 | 26.34
Output | 0.0016391 | 0.012378 | 0.050919 | 13.9 | 0.08
Modify | 0.45894 | 1.2107 | 4.4629 | 116.4 | 7.42
Other | | 0.09292 | | | 0.57
Nlocal: 172.5 ave 380 max 70 min
Histogram: 5 0 0 0 0 0 0 1 1 1
Nghost: 2213 ave 4440 max 903 min
Histogram: 3 0 0 2 0 0 2 0 0 1
Neighs: 10042.5 ave 24051 max 3500 min
Histogram: 5 0 0 0 0 0 0 1 1 1
Total # of neighbors = 80340
Ave neighs/atom = 58.2174
Ave special neighs/atom = 0
Neighbor list builds = 123
Dangerous builds = 1
# Turn off recentering during production phase
unfix recentering
# Setup trajectory output
dump myDump all custom 100 benzene_water.lammpstrj.gz id type x y z element
dump_modify myDump element B W
dump_modify myDump sort id
# Production (for realistic results, run for 10000000 steps)
reset_timestep 0
run 1000
Per MPI rank memory allocation (min/avg/max) = 8.232 | 8.492 | 8.851 Mbytes
Temp KinEng PotEng TotEng E_bond E_angle E_dihed E_vdwl
321.68092 1322.2813 3784.0834 5106.3647 0 0 0 3784.0834
310.59763 1276.7231 3318.3283 4595.0513 0 0 0 3318.3283
303.39445 1247.1141 3324.1191 4571.2332 0 0 0 3324.1191
311.37275 1279.9092 3305.0901 4584.9993 0 0 0 3305.0901
311.29071 1279.572 3248.216 4527.788 0 0 0 3248.216
314.53456 1292.906 3283.4563 4576.3623 0 0 0 3283.4563
316.52595 1301.0916 3258.9171 4560.0087 0 0 0 3258.9171
318.92447 1310.9509 3235.6256 4546.5765 0 0 0 3235.6256
311.79212 1281.6331 3308.099 4589.7321 0 0 0 3308.099
305.52477 1255.8709 3267.6907 4523.5616 0 0 0 3267.6907
301.07457 1237.5782 3206.3997 4443.9779 0 0 0 3206.3997
Loop time of 4.44139 on 8 procs for 1000 steps with 1380 atoms
Performance: 38.907 ns/day, 0.617 hours/ns, 225.155 timesteps/s
60.8% CPU use with 8 MPI tasks x no OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 0.656 | 2.5078 | 3.5775 | 57.7 | 56.46
Bond | 0.00013375 | 0.0001854 | 0.0002377 | 0.0 | 0.00
Neigh | 0.0048757 | 0.029188 | 0.090432 | 18.9 | 0.66
Comm | 0.51836 | 1.4427 | 2.6285 | 56.9 | 32.48
Output | 0.083084 | 0.089199 | 0.10333 | 2.3 | 2.01
Modify | 0.0087376 | 0.019705 | 0.038437 | 8.4 | 0.44
Other | | 0.3526 | | | 7.94
Nlocal: 172.5 ave 388 max 69 min
Histogram: 5 0 0 0 0 0 0 2 0 1
Nghost: 2207.88 ave 4429 max 896 min
Histogram: 3 0 0 2 0 0 2 0 0 1
Neighs: 10094.1 ave 24847 max 3403 min
Histogram: 5 0 0 0 0 0 1 1 0 1
Total # of neighbors = 80753
Ave neighs/atom = 58.5167
Ave special neighs/atom = 0
Neighbor list builds = 23
Dangerous builds = 0
Total wall time: 0:00:21

View File

@ -0,0 +1,300 @@
LAMMPS (27 Oct 2021)
OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98)
using 1 OpenMP thread(s) per MPI task
# LAMMPS input file for 26.5% benzene mole fraction solution
# with 380 benzene and 1000 water molecules,
# using all possible local density potentials
# between benzene and water
#
# Author: Tanmoy Sanyal, Shell Group, UC Santa Barbara
#
# Refer: Sanyal and Shell, JPC-B, 2018, 122 (21), 5678-5693
# Initialize simulation box
dimension 3
boundary p p p
units real
atom_style molecular
# Set potential styles
pair_style hybrid/overlay table spline 500 local/density
# Read molecule data and set initial velocities
read_data benzene_water.data
Reading data file ...
orthogonal box = (-12.865000 -12.865000 -64.829000) to (12.865000 12.865000 64.829000)
1 by 1 by 1 MPI processor grid
reading atoms ...
1380 atoms
Finding 1-2 1-3 1-4 neighbors ...
special bond factors lj: 0 0 0
special bond factors coul: 0 0 0
0 = max # of 1-2 neighbors
0 = max # of 1-3 neighbors
0 = max # of 1-4 neighbors
1 = max # of special neighbors
special bonds CPU = 0.000 seconds
read_data CPU = 0.006 seconds
velocity all create 3.0000e+02 16611 rot yes dist gaussian
# Assign potentials
pair_coeff 1 1 table benzene_water.pair.table PairBB
WARNING: 33 of 500 force values in table PairBB are inconsistent with -dE/dr.
WARNING: Should only be flagged at inflection points (src/pair_table.cpp:465)
WARNING: 150 of 500 distance values in table 1e-06 with relative error
WARNING: over PairBB to re-computed values (src/pair_table.cpp:473)
pair_coeff 1 2 table benzene_water.pair.table PairWW
WARNING: 61 of 500 force values in table PairWW are inconsistent with -dE/dr.
WARNING: Should only be flagged at inflection points (src/pair_table.cpp:465)
WARNING: 90 of 500 distance values in table 1e-06 with relative error
WARNING: over PairWW to re-computed values (src/pair_table.cpp:473)
pair_coeff 2 2 table benzene_water.pair.table PairBW
WARNING: 108 of 500 force values in table PairBW are inconsistent with -dE/dr.
WARNING: Should only be flagged at inflection points (src/pair_table.cpp:465)
WARNING: 135 of 500 distance values in table 1e-06 with relative error
WARNING: over PairBW to re-computed values (src/pair_table.cpp:473)
pair_coeff * * local/density benzene_water.localdensity.table
# Recentering during minimization and equilibration
fix recentering all recenter 0.0 0.0 0.0 units box
# Thermostat & time integration
timestep 2.0
thermo 100
thermo_style custom temp ke pe etotal ebond eangle edihed evdwl
# Minimization
minimize 1.e-4 0.0 10000 10000
CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
Your simulation uses code contributions which should be cited:
- pair_style local/density command:
@Article{Sanyal16,
author = {T.Sanyal and M.Scott Shell},
title = {Coarse-grained models using local-density potentials optimized with the relative entropy: Application to implicit solvation},
journal = {J.~Chem.~Phys.},
year = 2016,
DOI = doi.org/10.1063/1.4958629}
@Article{Sanyal18,
author = {T.Sanyal and M.Scott Shell},
title = {Transferable coarse-grained models of liquid-liquid equilibrium using local density potentials optimized with the relative entropy},
journal = {J.~Phys.~Chem. B},
year = 2018,
DOI = doi.org/10.1021/acs.jpcb.7b12446}
CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
WARNING: Using 'neigh_modify every 1 delay 0 check yes' setting during minimization (src/min.cpp:187)
generated 0 of 1 mixed pair_coeff terms from geometric mixing rule
Neighbor list info ...
update every 1 steps, delay 0 steps, check yes
max neighbors/atom: 2000, page size: 100000
master list distance cutoff = 15.25
ghost atom cutoff = 15.25
binsize = 7.625, bins = 4 4 18
2 neighbor lists, perpetual/occasional/extra = 2 0 0
(1) pair table, perpetual
attributes: half, newton on
pair build: half/bin/newton
stencil: half/bin/3d
bin: standard
(2) pair local/density, perpetual, copy from (1)
attributes: half, newton on
pair build: copy
stencil: none
bin: none
Per MPI rank memory allocation (min/avg/max) = 8.754 | 8.754 | 8.754 Mbytes
Temp KinEng PotEng TotEng E_bond E_angle E_dihed E_vdwl
300 1233.1611 2374.6749 3607.836 0 0 0 2374.6749
300 1233.1611 985.54829 2218.7094 0 0 0 985.54829
300 1233.1611 962.66036 2195.8215 0 0 0 962.66036
Loop time of 0.812343 on 1 procs for 134 steps with 1380 atoms
99.8% CPU use with 1 MPI tasks x 1 OpenMP threads
Minimization stats:
Stopping criterion = energy tolerance
Energy initial, next-to-last, final =
2374.67491482358 962.664796664787 962.660357218268
Force two-norm initial, final = 263.77519 15.741017
Force max component initial, final = 22.412654 7.9360139
Final line search alpha, max atom move = 0.014975513 0.11884588
Iterations, force evaluations = 134 240
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 0.78539 | 0.78539 | 0.78539 | 0.0 | 96.68
Bond | 2.0149e-05 | 2.0149e-05 | 2.0149e-05 | 0.0 | 0.00
Neigh | 0.016759 | 0.016759 | 0.016759 | 0.0 | 2.06
Comm | 0.0045 | 0.0045 | 0.0045 | 0.0 | 0.55
Output | 2.9402e-05 | 2.9402e-05 | 2.9402e-05 | 0.0 | 0.00
Modify | 0 | 0 | 0 | 0.0 | 0.00
Other | | 0.005647 | | | 0.70
Nlocal: 1380.00 ave 1380 max 1380 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Nghost: 5832.00 ave 5832 max 5832 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Neighs: 78165.0 ave 78165 max 78165 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Total # of neighbors = 78165
Ave neighs/atom = 56.641304
Ave special neighs/atom = 0.0000000
Neighbor list builds = 5
Dangerous builds = 0
# Set up integration parameters
fix timeintegration all nve
fix thermostat all langevin 3.0000e+02 3.0000e+02 1.0000e+02 81890
# Equilibration (for realistic results, run for 5000000 steps)
reset_timestep 0
run 5000
generated 0 of 1 mixed pair_coeff terms from geometric mixing rule
WARNING: Fix recenter should come after all other integration fixes (src/fix_recenter.cpp:133)
Per MPI rank memory allocation (min/avg/max) = 7.629 | 7.629 | 7.629 Mbytes
Temp KinEng PotEng TotEng E_bond E_angle E_dihed E_vdwl
300 1233.1611 962.66036 2195.8215 0 0 0 962.66036
253.1913 1040.7522 1803.711 2844.4633 0 0 0 1803.711
290.31049 1193.332 2059.0637 3252.3958 0 0 0 2059.0637
299.30778 1230.3157 2140.226 3370.5417 0 0 0 2140.226
309.81524 1273.507 2178.3782 3451.8853 0 0 0 2178.3782
299.79526 1232.3195 2229.9248 3462.2444 0 0 0 2229.9248
299.24909 1230.0745 2260.7129 3490.7874 0 0 0 2260.7129
299.5898 1231.475 2244.2384 3475.7134 0 0 0 2244.2384
297.81223 1224.1682 2320.27 3544.4382 0 0 0 2320.27
301.53975 1239.4903 2277.0431 3516.5334 0 0 0 2277.0431
292.00572 1200.3003 2292.3073 3492.6076 0 0 0 2292.3073
309.19709 1270.9661 2303.6055 3574.5716 0 0 0 2303.6055
297.54933 1223.0876 2304.127 3527.2146 0 0 0 2304.127
303.48106 1247.4702 2303.5673 3551.0375 0 0 0 2303.5673
296.46047 1218.6118 2256.1591 3474.7709 0 0 0 2256.1591
299.4835 1231.038 2280.0452 3511.0832 0 0 0 2280.0452
306.25958 1258.8914 2307.9795 3566.8709 0 0 0 2307.9795
304.67335 1252.3711 2284.8252 3537.1963 0 0 0 2284.8252
298.33637 1226.3227 2289.8499 3516.1726 0 0 0 2289.8499
303.1338 1246.0427 2342.2148 3588.2575 0 0 0 2342.2148
305.86051 1257.251 2341.0106 3598.2616 0 0 0 2341.0106
297.75418 1223.9296 2303.5613 3527.4909 0 0 0 2303.5613
296.79348 1219.9806 2327.5207 3547.5013 0 0 0 2327.5207
307.25403 1262.9791 2288.4219 3551.401 0 0 0 2288.4219
301.26976 1238.3805 2291.2465 3529.627 0 0 0 2291.2465
297.17249 1221.5385 2283.3926 3504.9311 0 0 0 2283.3926
313.99072 1290.6705 2293.9661 3584.6366 0 0 0 2293.9661
301.70804 1240.1821 2331.1694 3571.3515 0 0 0 2331.1694
300.62599 1235.7343 2325.4367 3561.171 0 0 0 2325.4367
292.13495 1200.8316 2315.631 3516.4626 0 0 0 2315.631
313.9981 1290.7008 2286.0536 3576.7545 0 0 0 2286.0536
300.25311 1234.2015 2324.2379 3558.4394 0 0 0 2324.2379
309.3746 1271.6958 2322.2298 3593.9256 0 0 0 2322.2298
300.23041 1234.1082 2332.7521 3566.8603 0 0 0 2332.7521
302.97054 1245.3716 2303.1689 3548.5405 0 0 0 2303.1689
294.77155 1211.6694 2334.5087 3546.1781 0 0 0 2334.5087
296.81476 1220.0681 2322.5932 3542.6613 0 0 0 2322.5932
301.83238 1240.6932 2345.4841 3586.1773 0 0 0 2345.4841
295.0399 1212.7724 2312.3889 3525.1614 0 0 0 2312.3889
300.73565 1236.185 2338.8384 3575.0235 0 0 0 2338.8384
303.02264 1245.5858 2310.0868 3555.6726 0 0 0 2310.0868
302.86404 1244.9339 2332.2001 3577.134 0 0 0 2332.2001
293.77916 1207.5901 2293.2799 3500.8701 0 0 0 2293.2799
299.30072 1230.2867 2317.5065 3547.7933 0 0 0 2317.5065
311.05029 1278.5837 2311.0476 3589.6313 0 0 0 2311.0476
293.25646 1205.4416 2314.7398 3520.1814 0 0 0 2314.7398
310.49018 1276.2814 2337.4909 3613.7723 0 0 0 2337.4909
302.37336 1242.9169 2340.3197 3583.2366 0 0 0 2340.3197
297.06862 1221.1116 2323.9136 3545.0252 0 0 0 2323.9136
300.54817 1235.4144 2315.2405 3550.6549 0 0 0 2315.2405
309.10643 1270.5934 2333.1848 3603.7783 0 0 0 2333.1848
Loop time of 15.2696 on 1 procs for 5000 steps with 1380 atoms
Performance: 56.583 ns/day, 0.424 hours/ns, 327.447 timesteps/s
99.9% CPU use with 1 MPI tasks x 1 OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 14.432 | 14.432 | 14.432 | 0.0 | 94.51
Bond | 0.00032375 | 0.00032375 | 0.00032375 | 0.0 | 0.00
Neigh | 0.41541 | 0.41541 | 0.41541 | 0.0 | 2.72
Comm | 0.0975 | 0.0975 | 0.0975 | 0.0 | 0.64
Output | 0.0013044 | 0.0013044 | 0.0013044 | 0.0 | 0.01
Modify | 0.30336 | 0.30336 | 0.30336 | 0.0 | 1.99
Other | | 0.01973 | | | 0.13
Nlocal: 1380.00 ave 1380 max 1380 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Nghost: 5843.00 ave 5843 max 5843 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Neighs: 76949.0 ave 76949 max 76949 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Total # of neighbors = 76949
Ave neighs/atom = 55.760145
Ave special neighs/atom = 0.0000000
Neighbor list builds = 121
Dangerous builds = 1
# Turn off recentering during production phase
unfix recentering
# Setup trajectory output
dump myDump all custom 100 benzene_water.lammpstrj.gz id type x y z element
dump_modify myDump element B W
dump_modify myDump sort id
# Production (for realistic results, run for 10000000 steps)
reset_timestep 0
run 1000
generated 0 of 1 mixed pair_coeff terms from geometric mixing rule
Per MPI rank memory allocation (min/avg/max) = 9.022 | 9.022 | 9.022 Mbytes
Temp KinEng PotEng TotEng E_bond E_angle E_dihed E_vdwl
309.10643 1270.5934 2333.1848 3603.7783 0 0 0 2333.1848
300.84572 1236.6375 2331.3493 3567.9868 0 0 0 2331.3493
300.90599 1236.8852 2337.6775 3574.5627 0 0 0 2337.6775
302.77895 1244.5841 2341.7778 3586.362 0 0 0 2341.7778
291.66639 1198.9055 2320.3512 3519.2567 0 0 0 2320.3512
298.7003 1227.8187 2292.8195 3520.6382 0 0 0 2292.8195
301.11163 1237.7305 2310.017 3547.7475 0 0 0 2310.017
305.22515 1254.6393 2315.1355 3569.7748 0 0 0 2315.1355
295.15921 1213.2629 2310.184 3523.4468 0 0 0 2310.184
299.2024 1229.8826 2332.2118 3562.0943 0 0 0 2332.2118
302.80078 1244.6738 2320.3763 3565.0502 0 0 0 2320.3763
Loop time of 3.07208 on 1 procs for 1000 steps with 1380 atoms
Performance: 56.249 ns/day, 0.427 hours/ns, 325.512 timesteps/s
99.9% CPU use with 1 MPI tasks x 1 OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 2.8993 | 2.8993 | 2.8993 | 0.0 | 94.37
Bond | 6.5327e-05 | 6.5327e-05 | 6.5327e-05 | 0.0 | 0.00
Neigh | 0.083502 | 0.083502 | 0.083502 | 0.0 | 2.72
Comm | 0.019967 | 0.019967 | 0.019967 | 0.0 | 0.65
Output | 0.012268 | 0.012268 | 0.012268 | 0.0 | 0.40
Modify | 0.052801 | 0.052801 | 0.052801 | 0.0 | 1.72
Other | | 0.004203 | | | 0.14
Nlocal: 1380.00 ave 1380 max 1380 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Nghost: 5860.00 ave 5860 max 5860 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Neighs: 77055.0 ave 77055 max 77055 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Total # of neighbors = 77055
Ave neighs/atom = 55.836957
Ave special neighs/atom = 0.0000000
Neighbor list builds = 24
Dangerous builds = 0
Total wall time: 0:00:19

View File

@ -0,0 +1,299 @@
LAMMPS (27 Oct 2021)
OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98)
using 1 OpenMP thread(s) per MPI task
# LAMMPS input file for 26.5% benzene mole fraction solution
# with 380 benzene and 1000 water molecules,
# using all possible local density potentials
# between benzene and water
#
# Author: Tanmoy Sanyal, Shell Group, UC Santa Barbara
#
# Refer: Sanyal and Shell, JPC-B, 2018, 122 (21), 5678-5693
# Initialize simulation box
dimension 3
boundary p p p
units real
atom_style molecular
# Set potential styles
pair_style hybrid/overlay table spline 500 local/density
# Read molecule data and set initial velocities
read_data benzene_water.data
Reading data file ...
orthogonal box = (-12.865000 -12.865000 -64.829000) to (12.865000 12.865000 64.829000)
1 by 1 by 4 MPI processor grid
reading atoms ...
1380 atoms
Finding 1-2 1-3 1-4 neighbors ...
special bond factors lj: 0 0 0
special bond factors coul: 0 0 0
0 = max # of 1-2 neighbors
0 = max # of 1-3 neighbors
0 = max # of 1-4 neighbors
1 = max # of special neighbors
special bonds CPU = 0.000 seconds
read_data CPU = 0.007 seconds
velocity all create 3.0000e+02 16611 rot yes dist gaussian
# Assign potentials
pair_coeff 1 1 table benzene_water.pair.table PairBB
WARNING: 33 of 500 force values in table PairBB are inconsistent with -dE/dr.
WARNING: Should only be flagged at inflection points (src/pair_table.cpp:465)
WARNING: 150 of 500 distance values in table 1e-06 with relative error
WARNING: over PairBB to re-computed values (src/pair_table.cpp:473)
pair_coeff 1 2 table benzene_water.pair.table PairWW
WARNING: 61 of 500 force values in table PairWW are inconsistent with -dE/dr.
WARNING: Should only be flagged at inflection points (src/pair_table.cpp:465)
WARNING: 90 of 500 distance values in table 1e-06 with relative error
WARNING: over PairWW to re-computed values (src/pair_table.cpp:473)
pair_coeff 2 2 table benzene_water.pair.table PairBW
WARNING: 108 of 500 force values in table PairBW are inconsistent with -dE/dr.
WARNING: Should only be flagged at inflection points (src/pair_table.cpp:465)
WARNING: 135 of 500 distance values in table 1e-06 with relative error
WARNING: over PairBW to re-computed values (src/pair_table.cpp:473)
pair_coeff * * local/density benzene_water.localdensity.table
# Recentering during minimization and equilibration
fix recentering all recenter 0.0 0.0 0.0 units box
# Thermostat & time integration
timestep 2.0
thermo 100
thermo_style custom temp ke pe etotal ebond eangle edihed evdwl
# Minimization
minimize 1.e-4 0.0 10000 10000
CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
Your simulation uses code contributions which should be cited:
- pair_style local/density command:
@Article{Sanyal16,
author = {T.Sanyal and M.Scott Shell},
title = {Coarse-grained models using local-density potentials optimized with the relative entropy: Application to implicit solvation},
journal = {J.~Chem.~Phys.},
year = 2016,
DOI = doi.org/10.1063/1.4958629}
@Article{Sanyal18,
author = {T.Sanyal and M.Scott Shell},
title = {Transferable coarse-grained models of liquid-liquid equilibrium using local density potentials optimized with the relative entropy},
journal = {J.~Phys.~Chem. B},
year = 2018,
DOI = doi.org/10.1021/acs.jpcb.7b12446}
CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
WARNING: Using 'neigh_modify every 1 delay 0 check yes' setting during minimization (src/min.cpp:187)
generated 0 of 1 mixed pair_coeff terms from geometric mixing rule
Neighbor list info ...
update every 1 steps, delay 0 steps, check yes
max neighbors/atom: 2000, page size: 100000
master list distance cutoff = 15.25
ghost atom cutoff = 15.25
binsize = 7.625, bins = 4 4 18
2 neighbor lists, perpetual/occasional/extra = 2 0 0
(1) pair table, perpetual
attributes: half, newton on
pair build: half/bin/newton
stencil: half/bin/3d
bin: standard
(2) pair local/density, perpetual, copy from (1)
attributes: half, newton on
pair build: copy
stencil: none
bin: none
Per MPI rank memory allocation (min/avg/max) = 8.441 | 8.589 | 8.688 Mbytes
Temp KinEng PotEng TotEng E_bond E_angle E_dihed E_vdwl
300 1233.1611 2374.6749 3607.836 0 0 0 2374.6749
300 1233.1611 1024.8113 2257.9724 0 0 0 1024.8113
Loop time of 0.240559 on 4 procs for 74 steps with 1380 atoms
98.5% CPU use with 4 MPI tasks x 1 OpenMP threads
Minimization stats:
Stopping criterion = energy tolerance
Energy initial, next-to-last, final =
2374.67491482358 1024.89407898645 1024.81130011575
Force two-norm initial, final = 263.77519 20.459697
Force max component initial, final = 22.412654 8.6082349
Final line search alpha, max atom move = 0.027790997 0.23923143
Iterations, force evaluations = 74 118
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 0.15928 | 0.1873 | 0.22814 | 6.5 | 77.86
Bond | 3.857e-06 | 4.4012e-06 | 5.496e-06 | 0.0 | 0.00
Neigh | 0.00064142 | 0.0028761 | 0.0058864 | 4.2 | 1.20
Comm | 0.0040776 | 0.039595 | 0.074187 | 12.6 | 16.46
Output | 0 | 0 | 0 | 0.0 | 0.00
Modify | 0 | 0 | 0 | 0.0 | 0.00
Other | | 0.01078 | | | 4.48
Nlocal: 345.000 ave 664 max 147 min
Histogram: 2 0 0 0 0 1 0 0 0 1
Nghost: 2850.50 ave 4438 max 1208 min
Histogram: 1 0 0 1 0 0 1 0 0 1
Neighs: 19377.5 ave 37718 max 7456 min
Histogram: 2 0 0 0 0 1 0 0 0 1
Total # of neighbors = 77510
Ave neighs/atom = 56.166667
Ave special neighs/atom = 0.0000000
Neighbor list builds = 3
Dangerous builds = 0
# Set up integration parameters
fix timeintegration all nve
fix thermostat all langevin 3.0000e+02 3.0000e+02 1.0000e+02 81890
# Equilibration (for realistic results, run for 5000000 steps)
reset_timestep 0
run 5000
generated 0 of 1 mixed pair_coeff terms from geometric mixing rule
WARNING: Fix recenter should come after all other integration fixes (src/fix_recenter.cpp:133)
Per MPI rank memory allocation (min/avg/max) = 7.316 | 7.465 | 7.563 Mbytes
Temp KinEng PotEng TotEng E_bond E_angle E_dihed E_vdwl
300 1233.1611 1024.8113 2257.9724 0 0 0 1024.8113
263.61917 1083.6164 1866.745 2950.3614 0 0 0 1866.745
296.0253 1216.823 2122.8463 3339.6692 0 0 0 2122.8463
301.93846 1241.1292 2172.9802 3414.1095 0 0 0 2172.9802
293.9491 1208.2887 2205.4892 3413.7779 0 0 0 2205.4892
286.33795 1177.0027 2204.8908 3381.8935 0 0 0 2204.8908
295.48217 1214.5904 2230.8849 3445.4753 0 0 0 2230.8849
293.88908 1208.0419 2218.7563 3426.7982 0 0 0 2218.7563
295.13798 1213.1756 2277.4515 3490.6271 0 0 0 2277.4515
290.39538 1193.681 2273.4385 3467.1195 0 0 0 2273.4385
297.56782 1223.1635 2268.7182 3491.8817 0 0 0 2268.7182
306.45578 1259.6978 2289.1507 3548.8486 0 0 0 2289.1507
308.54582 1268.289 2284.8514 3553.1404 0 0 0 2284.8514
302.17353 1242.0955 2262.5577 3504.6532 0 0 0 2262.5577
295.30087 1213.8452 2315.8853 3529.7305 0 0 0 2315.8853
308.59197 1268.4787 2291.8314 3560.3101 0 0 0 2291.8314
297.75618 1223.9378 2287.2003 3511.1381 0 0 0 2287.2003
303.43395 1247.2765 2297.7158 3544.9923 0 0 0 2297.7158
307.16233 1262.6021 2255.9769 3518.5791 0 0 0 2255.9769
301.34428 1238.6868 2284.416 3523.1028 0 0 0 2284.416
295.43209 1214.3846 2294.1043 3508.4889 0 0 0 2294.1043
287.86904 1183.2963 2257.0204 3440.3168 0 0 0 2257.0204
297.2661 1221.9233 2251.4194 3473.3428 0 0 0 2251.4194
298.90221 1228.6486 2261.834 3490.4826 0 0 0 2261.834
288.07202 1184.1307 2284.1918 3468.3225 0 0 0 2284.1918
300.41201 1234.8547 2303.9573 3538.812 0 0 0 2303.9573
283.91279 1167.034 2329.7936 3496.8277 0 0 0 2329.7936
297.27507 1221.9602 2337.0516 3559.0118 0 0 0 2337.0516
296.22263 1217.6341 2335.6424 3553.2765 0 0 0 2335.6424
296.13784 1217.2856 2364.7034 3581.989 0 0 0 2364.7034
308.17642 1266.7706 2320.2753 3587.0459 0 0 0 2320.2753
310.26592 1275.3596 2301.9318 3577.2914 0 0 0 2301.9318
292.97391 1204.2801 2289.8116 3494.0917 0 0 0 2289.8116
294.81231 1211.8369 2315.0388 3526.8757 0 0 0 2315.0388
298.66155 1227.6594 2317.2844 3544.9437 0 0 0 2317.2844
302.77939 1244.5859 2301.2063 3545.7922 0 0 0 2301.2063
291.47597 1198.1228 2285.1757 3483.2985 0 0 0 2285.1757
286.19045 1176.3964 2265.2665 3441.6629 0 0 0 2265.2665
295.58144 1214.9984 2272.3165 3487.315 0 0 0 2272.3165
283.86988 1166.8577 2320.6142 3487.4719 0 0 0 2320.6142
300.0576 1233.3979 2330.8962 3564.2941 0 0 0 2330.8962
299.86413 1232.6026 2321.2281 3553.8308 0 0 0 2321.2281
292.79017 1203.5248 2334.2308 3537.7557 0 0 0 2334.2308
291.5027 1198.2327 2335.2119 3533.4446 0 0 0 2335.2119
299.55471 1231.3307 2332.5216 3563.8524 0 0 0 2332.5216
293.29613 1205.6046 2295.3263 3500.9309 0 0 0 2295.3263
303.13151 1246.0333 2310.0548 3556.0881 0 0 0 2310.0548
298.83954 1228.391 2297.3117 3525.7027 0 0 0 2297.3117
297.44775 1222.67 2307.2483 3529.9183 0 0 0 2307.2483
309.59874 1272.6171 2309.2439 3581.861 0 0 0 2309.2439
307.47844 1263.9015 2274.998 3538.8995 0 0 0 2274.998
Loop time of 11.2235 on 4 procs for 5000 steps with 1380 atoms
Performance: 76.982 ns/day, 0.312 hours/ns, 445.495 timesteps/s
98.5% CPU use with 4 MPI tasks x 1 OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 7.1444 | 8.5074 | 10.534 | 44.9 | 75.80
Bond | 0.00017048 | 0.00020672 | 0.00030488 | 0.0 | 0.00
Neigh | 0.026174 | 0.12108 | 0.26052 | 28.2 | 1.08
Comm | 0.21788 | 1.8597 | 3.3375 | 81.2 | 16.57
Output | 0.0008989 | 0.0069895 | 0.021647 | 10.2 | 0.06
Modify | 0.19418 | 0.7044 | 2.1378 | 98.6 | 6.28
Other | | 0.02368 | | | 0.21
Nlocal: 345.000 ave 678 max 148 min
Histogram: 2 0 0 0 1 0 0 0 0 1
Nghost: 2854.25 ave 4464 max 1181 min
Histogram: 1 0 0 1 0 0 1 0 0 1
Neighs: 19366.8 ave 38533 max 7481 min
Histogram: 2 0 0 0 0 1 0 0 0 1
Total # of neighbors = 77467
Ave neighs/atom = 56.135507
Ave special neighs/atom = 0.0000000
Neighbor list builds = 121
Dangerous builds = 1
# Turn off recentering during production phase
unfix recentering
# Setup trajectory output
dump myDump all custom 100 benzene_water.lammpstrj.gz id type x y z element
dump_modify myDump element B W
dump_modify myDump sort id
# Production (for realistic results, run for 10000000 steps)
reset_timestep 0
run 1000
generated 0 of 1 mixed pair_coeff terms from geometric mixing rule
Per MPI rank memory allocation (min/avg/max) = 8.640 | 8.791 | 8.894 Mbytes
Temp KinEng PotEng TotEng E_bond E_angle E_dihed E_vdwl
307.47844 1263.9015 2274.998 3538.8995 0 0 0 2274.998
309.46142 1272.0526 2274.8499 3546.9026 0 0 0 2274.8499
300.70977 1236.0787 2301.0588 3537.1374 0 0 0 2301.0588
300.53659 1235.3668 2316.1008 3551.4675 0 0 0 2316.1008
300.48582 1235.1581 2296.3009 3531.459 0 0 0 2296.3009
299.2618 1230.1267 2325.7501 3555.8768 0 0 0 2325.7501
303.00905 1245.5299 2321.8238 3567.3537 0 0 0 2321.8238
300.07018 1233.4496 2339.2833 3572.7329 0 0 0 2339.2833
304.20292 1250.4374 2353.1018 3603.5392 0 0 0 2353.1018
304.19487 1250.4043 2334.5087 3584.913 0 0 0 2334.5087
294.24283 1209.4961 2335.0535 3544.5496 0 0 0 2335.0535
Loop time of 2.90512 on 4 procs for 1000 steps with 1380 atoms
Performance: 59.481 ns/day, 0.403 hours/ns, 344.220 timesteps/s
98.4% CPU use with 4 MPI tasks x 1 OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 1.8627 | 2.2082 | 2.7289 | 22.6 | 76.01
Bond | 4.042e-05 | 5.3677e-05 | 8.4044e-05 | 0.0 | 0.00
Neigh | 0.0066184 | 0.030172 | 0.064523 | 13.9 | 1.04
Comm | 0.05914 | 0.51145 | 0.86887 | 40.7 | 17.61
Output | 0.0057814 | 0.0073478 | 0.011158 | 2.6 | 0.25
Modify | 0.0085337 | 0.020869 | 0.042248 | 9.4 | 0.72
Other | | 0.127 | | | 4.37
Nlocal: 345.000 ave 682 max 147 min
Histogram: 2 0 0 0 1 0 0 0 0 1
Nghost: 2836.25 ave 4427 max 1175 min
Histogram: 1 0 0 1 0 0 1 0 0 1
Neighs: 19249.8 ave 38683 max 7433 min
Histogram: 2 0 0 0 1 0 0 0 0 1
Total # of neighbors = 76999
Ave neighs/atom = 55.796377
Ave special neighs/atom = 0.0000000
Neighbor list builds = 23
Dangerous builds = 0
Total wall time: 0:00:14

View File

@ -1,6 +1,6 @@
# LAMMPS input file for 50.0% methanol mole fraction solution
# with 2500 methanol molecules in implicit water.
#
#
#
# Author: David Rosenberger, van der Vegt Group, TU Darmstadt
#
@ -9,7 +9,7 @@
# Initialize simulation box
dimension 3
boundary p p p
boundary p p p
units real
atom_style molecular
@ -17,7 +17,7 @@ atom_style molecular
pair_style hybrid/overlay table spline 500 local/density
# Read molecule data and set initial velocities
read_data methanol_implicit_water.data
read_data methanol_implicit_water.data
velocity all create 3.0000e+02 12142 rot yes dist gaussian
# Assign potentials
@ -31,7 +31,7 @@ pair_coeff * * local/density methanol_implicit_water.localdensity.t
fix recentering all recenter 0.0 0.0 0.0 units box
#Thermostat & time integration
timestep 1.0
timestep 1.0
thermo 100
thermo_style custom etotal ke pe temp evdwl
@ -52,15 +52,14 @@ run 2000
#turn off recentering during production run
unfix recentering
reset_timestep 0
#setup trajectory output
dump myDump all custom 100 methanol_implicit_water.lammpstrj.gz id type x y z element
dump_modify myDump element M
dump_modify myDump sort id
#dump myDump all custom 100 methanol_implicit_water.lammpstrj.gz id type x y z element
#dump_modify myDump element M
#dump_modify myDump sort id
#run production (for realistic results, run for 10000000 steps)
reset_timestep 0
thermo 1000
thermo_style custom etotal ke pe temp evdwl
run 10000

View File

@ -1,226 +0,0 @@
LAMMPS (7 Aug 2019)
# LAMMPS input file for 50.0% methanol mole fraction solution
# with 2500 methanol molecules in implicit water.
#
#
# Author: David Rosenberger, van der Vegt Group, TU Darmstadt
#
# Refer: Rosenberger, Sanyal, Shell, van der Vegt, J. Chem. Theory Comput. 15, 2881-2895 (2019)
# Initialize simulation box
dimension 3
boundary p p p
units real
atom_style molecular
# Set potential styles
pair_style hybrid/overlay table spline 500 local/density
# Read molecule data and set initial velocities
read_data methanol_implicit_water.data
orthogonal box = (-31.123 -31.123 -31.123) to (31.123 31.123 31.123)
2 by 2 by 2 MPI processor grid
reading atoms ...
2500 atoms
0 = max # of 1-2 neighbors
0 = max # of 1-3 neighbors
0 = max # of 1-4 neighbors
1 = max # of special neighbors
special bonds CPU = 0.00063014 secs
read_data CPU = 0.00599909 secs
velocity all create 3.0000e+02 12142 rot yes dist gaussian
# Assign potentials
pair_coeff 1 1 table methanol_implicit_water.pair.table PairMM
WARNING: 93 of 500 force values in table are inconsistent with -dE/dr.
Should only be flagged at inflection points (../pair_table.cpp:483)
WARNING: 254 of 500 distance values in table with relative error
over 1e-06 to re-computed values (../pair_table.cpp:492)
pair_coeff * * local/density methanol_implicit_water.localdensity.table
#Recentering during minimization and equilibration
fix recentering all recenter 0.0 0.0 0.0 units box
#Thermostat & time integration
timestep 1.0
thermo 100
thermo_style custom etotal ke pe temp evdwl
#minimization
minimize 1.e-4 0.0 1000 1000
WARNING: Using 'neigh_modify every 1 delay 0 check yes' setting during minimization (../min.cpp:168)
Neighbor list info ...
update every 1 steps, delay 0 steps, check yes
max neighbors/atom: 2000, page size: 100000
master list distance cutoff = 17
ghost atom cutoff = 17
binsize = 8.5, bins = 8 8 8
2 neighbor lists, perpetual/occasional/extra = 2 0 0
(1) pair table, perpetual
attributes: half, newton on
pair build: half/bin/newton
stencil: half/bin/3d/newton
bin: standard
(2) pair local/density, perpetual, copy from (1)
attributes: half, newton on
pair build: copy
stencil: none
bin: none
Per MPI rank memory allocation (min/avg/max) = 7.411 | 7.411 | 7.412 Mbytes
TotEng KinEng PotEng Temp E_vdwl
1470.3564 2234.7133 -764.35689 300 -764.35689
46.496766 2234.7133 -2188.2165 300 -2188.2165
7.9030246 2234.7133 -2226.8103 300 -2226.8103
Loop time of 0.463996 on 8 procs for 121 steps with 2500 atoms
91.4% CPU use with 8 MPI tasks x no OpenMP threads
Minimization stats:
Stopping criterion = linesearch alpha is zero
Energy initial, next-to-last, final =
-764.356892369 -2227.85589084 -2226.81026984
Force two-norm initial, final = 134.911 3.83896
Force max component initial, final = 14.1117 1.07422
Final line search alpha, max atom move = 5.06747e-10 5.44356e-10
Iterations, force evaluations = 121 154
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 0.41442 | 0.41976 | 0.42434 | 0.5 | 90.47
Bond | 1.1683e-05 | 2.0713e-05 | 3.5048e-05 | 0.0 | 0.00
Neigh | 0.0084722 | 0.0090862 | 0.010038 | 0.5 | 1.96
Comm | 0.022712 | 0.028157 | 0.034072 | 1.9 | 6.07
Output | 3.1948e-05 | 3.6925e-05 | 6.6996e-05 | 0.0 | 0.01
Modify | 0 | 0 | 0 | 0.0 | 0.00
Other | | 0.006937 | | | 1.50
Nlocal: 312.5 ave 333 max 299 min
Histogram: 2 2 0 0 1 0 2 0 0 1
Nghost: 2546 ave 2580 max 2517 min
Histogram: 1 1 0 3 0 1 0 0 0 2
Neighs: 33215.4 ave 37251 max 29183 min
Histogram: 1 0 0 1 2 2 0 1 0 1
Total # of neighbors = 265723
Ave neighs/atom = 106.289
Ave special neighs/atom = 0
Neighbor list builds = 6
Dangerous builds = 0
#set up integration parameters
fix timeintegration all nve
fix thermostat all langevin 3.0000e+02 3.0000e+02 1.0000e+02 59915
#Equilibration (for realistic results, run for 2000000 steps)
reset_timestep 0
thermo 200
thermo_style custom etotal ke pe temp evdwl
#run equilibration
run 2000
WARNING: Fix recenter should come after all other integration fixes (../fix_recenter.cpp:131)
Per MPI rank memory allocation (min/avg/max) = 6.286 | 6.286 | 6.287 Mbytes
TotEng KinEng PotEng Temp E_vdwl
177.26822 2234.7133 -2057.4451 300 -2057.4451
736.24287 2151.2608 -1415.0179 288.79688 -1415.0179
963.07617 2090.6433 -1127.5671 280.65926 -1127.5671
1148.9049 2173.1327 -1024.2279 291.73309 -1024.2279
1303.6409 2279.8586 -976.21767 306.06055 -976.21767
1355.42 2281.0383 -925.61826 306.21892 -925.61826
1394.5206 2276.2093 -881.68863 305.57064 -881.68863
1346.9764 2215.2973 -868.32091 297.3935 -868.32091
1381.3654 2248.8061 -867.44063 301.89189 -867.44063
1315.8059 2189.3193 -873.51332 293.90606 -873.51332
1314.4456 2209.7431 -895.29752 296.64787 -895.29752
Loop time of 6.38989 on 8 procs for 2000 steps with 2500 atoms
Performance: 27.043 ns/day, 0.887 hours/ns, 312.994 timesteps/s
80.5% CPU use with 8 MPI tasks x no OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 5.2693 | 5.3572 | 5.457 | 2.1 | 83.84
Bond | 0.00028825 | 0.00033835 | 0.00039148 | 0.0 | 0.01
Neigh | 0.0296 | 0.032337 | 0.035071 | 0.9 | 0.51
Comm | 0.64679 | 0.73397 | 0.80847 | 5.2 | 11.49
Output | 0.00033498 | 0.00051582 | 0.0015228 | 0.0 | 0.01
Modify | 0.16395 | 0.18919 | 0.21056 | 3.9 | 2.96
Other | | 0.07636 | | | 1.19
Nlocal: 312.5 ave 337 max 295 min
Histogram: 2 2 0 1 0 0 0 1 1 1
Nghost: 2551.62 ave 2582 max 2525 min
Histogram: 2 1 0 0 1 1 1 0 1 1
Neighs: 33241.8 ave 37659 max 29705 min
Histogram: 2 0 0 2 2 0 0 0 1 1
Total # of neighbors = 265934
Ave neighs/atom = 106.374
Ave special neighs/atom = 0
Neighbor list builds = 21
Dangerous builds = 0
#turn off recentering during production run
unfix recentering
#setup trajectory output
dump myDump all custom 100 methanol_implicit_water.lammpstrj.gz id type x y z element
dump_modify myDump element M
dump_modify myDump sort id
#run production (for realistic results, run for 10000000 steps)
reset_timestep 0
thermo 1000
thermo_style custom etotal ke pe temp evdwl
run 10000
Per MPI rank memory allocation (min/avg/max) = 7.588 | 7.589 | 7.589 Mbytes
TotEng KinEng PotEng Temp E_vdwl
1442.5428 2209.7431 -767.20027 296.64787 -767.20027
1391.8624 2262.6889 -870.82656 303.7556 -870.82656
1375.914 2244.6176 -868.7036 301.3296 -868.7036
1345.9064 2227.2324 -881.32599 298.99573 -881.32599
1379.2334 2278.1156 -898.88222 305.82657 -898.88222
1389.7928 2255.8062 -866.01341 302.83163 -866.01341
1380.4549 2258.2108 -877.75582 303.15443 -877.75582
1380.8489 2256.9432 -876.09428 302.98426 -876.09428
1326.5151 2225.7408 -899.22577 298.79549 -899.22577
1376.6025 2253.0128 -876.41028 302.45662 -876.41028
1331.0008 2218.1033 -887.10258 297.77019 -887.10258
Loop time of 25.4591 on 8 procs for 10000 steps with 2500 atoms
Performance: 33.937 ns/day, 0.707 hours/ns, 392.787 timesteps/s
89.3% CPU use with 8 MPI tasks x no OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 21.635 | 21.916 | 22.237 | 3.9 | 86.08
Bond | 0.0011308 | 0.0013149 | 0.0016932 | 0.5 | 0.01
Neigh | 0.14593 | 0.15675 | 0.16667 | 1.9 | 0.62
Comm | 1.3789 | 1.7502 | 1.9558 | 13.7 | 6.87
Output | 0.34664 | 0.82927 | 1.2013 | 32.8 | 3.26
Modify | 0.24904 | 0.25842 | 0.26907 | 1.2 | 1.02
Other | | 0.5475 | | | 2.15
Nlocal: 312.5 ave 327 max 298 min
Histogram: 2 0 0 1 1 0 1 1 1 1
Nghost: 2575 ave 2601 max 2559 min
Histogram: 2 0 3 1 0 0 0 0 1 1
Neighs: 33223.2 ave 35920 max 30303 min
Histogram: 1 1 1 1 0 1 0 0 0 3
Total # of neighbors = 265786
Ave neighs/atom = 106.314
Ave special neighs/atom = 0
Neighbor list builds = 103
Dangerous builds = 0
Total wall time: 0:00:32

View File

@ -0,0 +1,259 @@
LAMMPS (27 Oct 2021)
OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98)
using 1 OpenMP thread(s) per MPI task
# LAMMPS input file for 50.0% methanol mole fraction solution
# with 2500 methanol molecules in implicit water.
#
#
# Author: David Rosenberger, van der Vegt Group, TU Darmstadt
#
# Refer: Rosenberger, Sanyal, Shell, van der Vegt, J. Chem. Theory Comput. 15, 2881-2895 (2019)
# Initialize simulation box
dimension 3
boundary p p p
units real
atom_style molecular
# Set potential styles
pair_style hybrid/overlay table spline 500 local/density
# Read molecule data and set initial velocities
read_data methanol_implicit_water.data
Reading data file ...
orthogonal box = (-31.123000 -31.123000 -31.123000) to (31.123000 31.123000 31.123000)
1 by 1 by 1 MPI processor grid
reading atoms ...
2500 atoms
Finding 1-2 1-3 1-4 neighbors ...
special bond factors lj: 0 0 0
special bond factors coul: 0 0 0
0 = max # of 1-2 neighbors
0 = max # of 1-3 neighbors
0 = max # of 1-4 neighbors
1 = max # of special neighbors
special bonds CPU = 0.001 seconds
read_data CPU = 0.016 seconds
velocity all create 3.0000e+02 12142 rot yes dist gaussian
# Assign potentials
pair_coeff 1 1 table methanol_implicit_water.pair.table PairMM
WARNING: 93 of 500 force values in table PairMM are inconsistent with -dE/dr.
WARNING: Should only be flagged at inflection points (src/pair_table.cpp:465)
WARNING: 254 of 500 distance values in table 1e-06 with relative error
WARNING: over PairMM to re-computed values (src/pair_table.cpp:473)
pair_coeff * * local/density methanol_implicit_water.localdensity.table
#Recentering during minimization and equilibration
fix recentering all recenter 0.0 0.0 0.0 units box
#Thermostat & time integration
timestep 1.0
thermo 100
thermo_style custom etotal ke pe temp evdwl
#minimization
minimize 1.e-4 0.0 1000 1000
CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
Your simulation uses code contributions which should be cited:
- pair_style local/density command:
@Article{Sanyal16,
author = {T.Sanyal and M.Scott Shell},
title = {Coarse-grained models using local-density potentials optimized with the relative entropy: Application to implicit solvation},
journal = {J.~Chem.~Phys.},
year = 2016,
DOI = doi.org/10.1063/1.4958629}
@Article{Sanyal18,
author = {T.Sanyal and M.Scott Shell},
title = {Transferable coarse-grained models of liquid-liquid equilibrium using local density potentials optimized with the relative entropy},
journal = {J.~Phys.~Chem. B},
year = 2018,
DOI = doi.org/10.1021/acs.jpcb.7b12446}
CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
WARNING: Using 'neigh_modify every 1 delay 0 check yes' setting during minimization (src/min.cpp:187)
generated 0 of 0 mixed pair_coeff terms from geometric mixing rule
Neighbor list info ...
update every 1 steps, delay 0 steps, check yes
max neighbors/atom: 2000, page size: 100000
master list distance cutoff = 17
ghost atom cutoff = 17
binsize = 8.5, bins = 8 8 8
2 neighbor lists, perpetual/occasional/extra = 2 0 0
(1) pair table, perpetual
attributes: half, newton on
pair build: half/bin/newton
stencil: half/bin/3d
bin: standard
(2) pair local/density, perpetual, copy from (1)
attributes: half, newton on
pair build: copy
stencil: none
bin: none
Per MPI rank memory allocation (min/avg/max) = 9.535 | 9.535 | 9.535 Mbytes
TotEng KinEng PotEng Temp E_vdwl
1283.8556 2234.7133 -950.85771 300 -950.85771
-10.187232 2234.7133 -2244.9005 300 -2244.9005
-124.79406 2234.7133 -2359.5074 300 -2359.5074
-126.7619 2234.7133 -2361.4752 300 -2361.4752
Loop time of 3.74581 on 1 procs for 205 steps with 2500 atoms
99.5% CPU use with 1 MPI tasks x 1 OpenMP threads
Minimization stats:
Stopping criterion = energy tolerance
Energy initial, next-to-last, final =
-950.857712502514 -2361.24417962983 -2361.47519428972
Force two-norm initial, final = 135.25170 2.8038329
Force max component initial, final = 14.083102 1.1154133
Final line search alpha, max atom move = 0.16981022 0.18940857
Iterations, force evaluations = 205 223
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 3.5678 | 3.5678 | 3.5678 | 0.0 | 95.25
Bond | 7.5831e-05 | 7.5831e-05 | 7.5831e-05 | 0.0 | 0.00
Neigh | 0.12962 | 0.12962 | 0.12962 | 0.0 | 3.46
Comm | 0.019204 | 0.019204 | 0.019204 | 0.0 | 0.51
Output | 0.00023948 | 0.00023948 | 0.00023948 | 0.0 | 0.01
Modify | 0 | 0 | 0 | 0.0 | 0.00
Other | | 0.02886 | | | 0.77
Nlocal: 2500.00 ave 2500 max 2500 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Nghost: 6729.00 ave 6729 max 6729 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Neighs: 265637.0 ave 265637 max 265637 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Total # of neighbors = 265637
Ave neighs/atom = 106.25480
Ave special neighs/atom = 0.0000000
Neighbor list builds = 11
Dangerous builds = 0
#set up integration parameters
fix timeintegration all nve
fix thermostat all langevin 3.0000e+02 3.0000e+02 1.0000e+02 59915
#Equilibration (for realistic results, run for 2000000 steps)
reset_timestep 0
thermo 200
thermo_style custom etotal ke pe temp evdwl
#run equilibration
run 2000
generated 0 of 0 mixed pair_coeff terms from geometric mixing rule
WARNING: Fix recenter should come after all other integration fixes (src/fix_recenter.cpp:133)
Per MPI rank memory allocation (min/avg/max) = 8.410 | 8.410 | 8.410 Mbytes
TotEng KinEng PotEng Temp E_vdwl
-126.7619 2234.7133 -2361.4752 300 -2361.4752
517.05047 2015.8636 -1498.8131 270.62043 -1498.8131
931.78263 2135.4332 -1203.6506 286.6721 -1203.6506
1162.6209 2242.1662 -1079.5453 301.00051 -1079.5453
1164.2129 2211.6204 -1047.4075 296.89989 -1047.4075
1258.0085 2286.5942 -1028.5857 306.96477 -1028.5857
1231.1937 2200.814 -969.62032 295.44917 -969.62032
1251.2144 2245.0533 -993.83885 301.3881 -993.83885
1237.2495 2239.8802 -1002.6307 300.69363 -1002.6307
1232.3342 2224.3415 -992.00722 298.60763 -992.00722
1235.3228 2197.191 -961.86817 294.9628 -961.86817
Loop time of 23.6478 on 1 procs for 2000 steps with 2500 atoms
Performance: 7.307 ns/day, 3.284 hours/ns, 84.575 timesteps/s
99.5% CPU use with 1 MPI tasks x 1 OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 22.797 | 22.797 | 22.797 | 0.0 | 96.40
Bond | 0.00070412 | 0.00070412 | 0.00070412 | 0.0 | 0.00
Neigh | 0.2249 | 0.2249 | 0.2249 | 0.0 | 0.95
Comm | 0.12259 | 0.12259 | 0.12259 | 0.0 | 0.52
Output | 0.00088925 | 0.00088925 | 0.00088925 | 0.0 | 0.00
Modify | 0.46447 | 0.46447 | 0.46447 | 0.0 | 1.96
Other | | 0.03711 | | | 0.16
Nlocal: 2500.00 ave 2500 max 2500 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Nghost: 6752.00 ave 6752 max 6752 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Neighs: 265940.0 ave 265940 max 265940 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Total # of neighbors = 265940
Ave neighs/atom = 106.37600
Ave special neighs/atom = 0.0000000
Neighbor list builds = 20
Dangerous builds = 0
#turn off recentering during production run
unfix recentering
#setup trajectory output
dump myDump all custom 100 methanol_implicit_water.lammpstrj.gz id type x y z element
dump_modify myDump element M
dump_modify myDump sort id
#run production (for realistic results, run for 10000000 steps)
reset_timestep 0
thermo 1000
thermo_style custom etotal ke pe temp evdwl
run 10000
generated 0 of 0 mixed pair_coeff terms from geometric mixing rule
Per MPI rank memory allocation (min/avg/max) = 9.918 | 9.918 | 9.918 Mbytes
TotEng KinEng PotEng Temp E_vdwl
1235.3228 2197.191 -961.86817 294.9628 -961.86817
1289.8463 2236.1425 -946.29622 300.19186 -946.29622
1348.0825 2305.0295 -956.94703 309.43963 -956.94703
1279.5478 2241.1582 -961.61041 300.86521 -961.61041
1231.8597 2201.9591 -970.09949 295.60291 -970.09949
1277.3424 2221.3696 -944.02725 298.20867 -944.02725
1296.0116 2222.0998 -926.08818 298.3067 -926.08818
1266.2849 2206.3727 -940.08782 296.1954 -940.08782
1313.2808 2260.5077 -947.22683 303.46278 -947.22683
1309.3076 2234.3895 -925.08198 299.95654 -925.08198
1275.9792 2221.3037 -945.32449 298.19982 -945.32449
Loop time of 67.3224 on 1 procs for 10000 steps with 2500 atoms
Performance: 12.834 ns/day, 1.870 hours/ns, 148.539 timesteps/s
99.4% CPU use with 1 MPI tasks x 1 OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 64.476 | 64.476 | 64.476 | 0.0 | 95.77
Bond | 0.0014504 | 0.0014504 | 0.0014504 | 0.0 | 0.00
Neigh | 0.71333 | 0.71333 | 0.71333 | 0.0 | 1.06
Comm | 0.32846 | 0.32846 | 0.32846 | 0.0 | 0.49
Output | 0.46997 | 0.46997 | 0.46997 | 0.0 | 0.70
Modify | 1.2336 | 1.2336 | 1.2336 | 0.0 | 1.83
Other | | 0.09996 | | | 0.15
Nlocal: 2500.00 ave 2500 max 2500 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Nghost: 6662.00 ave 6662 max 6662 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Neighs: 265774.0 ave 265774 max 265774 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Total # of neighbors = 265774
Ave neighs/atom = 106.30960
Ave special neighs/atom = 0.0000000
Neighbor list builds = 104
Dangerous builds = 0
Total wall time: 0:01:34

View File

@ -0,0 +1,259 @@
LAMMPS (27 Oct 2021)
OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98)
using 1 OpenMP thread(s) per MPI task
# LAMMPS input file for 50.0% methanol mole fraction solution
# with 2500 methanol molecules in implicit water.
#
#
# Author: David Rosenberger, van der Vegt Group, TU Darmstadt
#
# Refer: Rosenberger, Sanyal, Shell, van der Vegt, J. Chem. Theory Comput. 15, 2881-2895 (2019)
# Initialize simulation box
dimension 3
boundary p p p
units real
atom_style molecular
# Set potential styles
pair_style hybrid/overlay table spline 500 local/density
# Read molecule data and set initial velocities
read_data methanol_implicit_water.data
Reading data file ...
orthogonal box = (-31.123000 -31.123000 -31.123000) to (31.123000 31.123000 31.123000)
1 by 2 by 2 MPI processor grid
reading atoms ...
2500 atoms
Finding 1-2 1-3 1-4 neighbors ...
special bond factors lj: 0 0 0
special bond factors coul: 0 0 0
0 = max # of 1-2 neighbors
0 = max # of 1-3 neighbors
0 = max # of 1-4 neighbors
1 = max # of special neighbors
special bonds CPU = 0.000 seconds
read_data CPU = 0.005 seconds
velocity all create 3.0000e+02 12142 rot yes dist gaussian
# Assign potentials
pair_coeff 1 1 table methanol_implicit_water.pair.table PairMM
WARNING: 93 of 500 force values in table PairMM are inconsistent with -dE/dr.
WARNING: Should only be flagged at inflection points (src/pair_table.cpp:465)
WARNING: 254 of 500 distance values in table 1e-06 with relative error
WARNING: over PairMM to re-computed values (src/pair_table.cpp:473)
pair_coeff * * local/density methanol_implicit_water.localdensity.table
#Recentering during minimization and equilibration
fix recentering all recenter 0.0 0.0 0.0 units box
#Thermostat & time integration
timestep 1.0
thermo 100
thermo_style custom etotal ke pe temp evdwl
#minimization
minimize 1.e-4 0.0 1000 1000
CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
Your simulation uses code contributions which should be cited:
- pair_style local/density command:
@Article{Sanyal16,
author = {T.Sanyal and M.Scott Shell},
title = {Coarse-grained models using local-density potentials optimized with the relative entropy: Application to implicit solvation},
journal = {J.~Chem.~Phys.},
year = 2016,
DOI = doi.org/10.1063/1.4958629}
@Article{Sanyal18,
author = {T.Sanyal and M.Scott Shell},
title = {Transferable coarse-grained models of liquid-liquid equilibrium using local density potentials optimized with the relative entropy},
journal = {J.~Phys.~Chem. B},
year = 2018,
DOI = doi.org/10.1021/acs.jpcb.7b12446}
CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
WARNING: Using 'neigh_modify every 1 delay 0 check yes' setting during minimization (src/min.cpp:187)
generated 0 of 0 mixed pair_coeff terms from geometric mixing rule
Neighbor list info ...
update every 1 steps, delay 0 steps, check yes
max neighbors/atom: 2000, page size: 100000
master list distance cutoff = 17
ghost atom cutoff = 17
binsize = 8.5, bins = 8 8 8
2 neighbor lists, perpetual/occasional/extra = 2 0 0
(1) pair table, perpetual
attributes: half, newton on
pair build: half/bin/newton
stencil: half/bin/3d
bin: standard
(2) pair local/density, perpetual, copy from (1)
attributes: half, newton on
pair build: copy
stencil: none
bin: none
Per MPI rank memory allocation (min/avg/max) = 7.855 | 7.855 | 7.855 Mbytes
TotEng KinEng PotEng Temp E_vdwl
1283.8556 2234.7133 -950.85771 300 -950.85771
-10.187232 2234.7133 -2244.9005 300 -2244.9005
-124.3661 2234.7133 -2359.0794 300 -2359.0794
-146.7158 2234.7133 -2381.4291 300 -2381.4291
Loop time of 0.528503 on 4 procs for 244 steps with 2500 atoms
99.7% CPU use with 4 MPI tasks x 1 OpenMP threads
Minimization stats:
Stopping criterion = energy tolerance
Energy initial, next-to-last, final =
-950.857712502527 -2381.2294195605 -2381.42909821383
Force two-norm initial, final = 135.25170 2.3117934
Force max component initial, final = 14.083102 0.60833889
Final line search alpha, max atom move = 0.18347073 0.11161238
Iterations, force evaluations = 244 278
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 0.48518 | 0.48843 | 0.49223 | 0.4 | 92.42
Bond | 1.0084e-05 | 1.0861e-05 | 1.1483e-05 | 0.0 | 0.00
Neigh | 0.018199 | 0.019153 | 0.020036 | 0.5 | 3.62
Comm | 0.010229 | 0.014832 | 0.018994 | 2.6 | 2.81
Output | 3.7985e-05 | 4.2069e-05 | 5.3874e-05 | 0.0 | 0.01
Modify | 0 | 0 | 0 | 0.0 | 0.00
Other | | 0.006032 | | | 1.14
Nlocal: 625.000 ave 638 max 618 min
Histogram: 2 0 0 0 1 0 0 0 0 1
Nghost: 3613.75 ave 3640 max 3580 min
Histogram: 1 0 0 0 1 0 0 0 1 1
Neighs: 66411.2 ave 70713 max 62416 min
Histogram: 1 0 1 0 0 0 1 0 0 1
Total # of neighbors = 265645
Ave neighs/atom = 106.25800
Ave special neighs/atom = 0.0000000
Neighbor list builds = 13
Dangerous builds = 0
#set up integration parameters
fix timeintegration all nve
fix thermostat all langevin 3.0000e+02 3.0000e+02 1.0000e+02 59915
#Equilibration (for realistic results, run for 2000000 steps)
reset_timestep 0
thermo 200
thermo_style custom etotal ke pe temp evdwl
#run equilibration
run 2000
generated 0 of 0 mixed pair_coeff terms from geometric mixing rule
WARNING: Fix recenter should come after all other integration fixes (src/fix_recenter.cpp:133)
Per MPI rank memory allocation (min/avg/max) = 6.730 | 6.730 | 6.731 Mbytes
TotEng KinEng PotEng Temp E_vdwl
-146.7158 2234.7133 -2381.4291 300 -2381.4291
540.68168 2041.44 -1500.7584 274.05395 -1500.7584
945.4949 2163.7509 -1218.256 290.47363 -1218.256
1118.7729 2195.7579 -1076.985 294.77042 -1076.985
1215.0058 2233.2445 -1018.2387 299.80282 -1018.2387
1251.8045 2240.8439 -989.03944 300.823 -989.03944
1206.649 2149.5807 -942.93169 288.57134 -942.93169
1290.6111 2248.3623 -957.75117 301.83231 -957.75117
1312.8944 2219.147 -906.25264 297.9103 -906.25264
1260.002 2211.4176 -951.41561 296.87266 -951.41561
1335.0956 2270.1367 -935.04108 304.75543 -935.04108
Loop time of 3.56721 on 4 procs for 2000 steps with 2500 atoms
Performance: 48.441 ns/day, 0.495 hours/ns, 560.663 timesteps/s
99.8% CPU use with 4 MPI tasks x 1 OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 3.3122 | 3.3399 | 3.3633 | 1.0 | 93.63
Bond | 7.5941e-05 | 8.062e-05 | 8.7627e-05 | 0.0 | 0.00
Neigh | 0.03524 | 0.036666 | 0.037864 | 0.6 | 1.03
Comm | 0.080116 | 0.10444 | 0.13373 | 6.1 | 2.93
Output | 0.00019977 | 0.00022502 | 0.00029007 | 0.0 | 0.01
Modify | 0.077781 | 0.078206 | 0.078752 | 0.1 | 2.19
Other | | 0.007641 | | | 0.21
Nlocal: 625.000 ave 637 max 616 min
Histogram: 1 0 1 0 1 0 0 0 0 1
Nghost: 3597.25 ave 3610 max 3586 min
Histogram: 1 0 1 0 0 0 1 0 0 1
Neighs: 66468.2 ave 69230 max 62721 min
Histogram: 1 0 0 1 0 0 0 0 0 2
Total # of neighbors = 265873
Ave neighs/atom = 106.34920
Ave special neighs/atom = 0.0000000
Neighbor list builds = 20
Dangerous builds = 0
#turn off recentering during production run
unfix recentering
#setup trajectory output
dump myDump all custom 100 methanol_implicit_water.lammpstrj.gz id type x y z element
dump_modify myDump element M
dump_modify myDump sort id
#run production (for realistic results, run for 10000000 steps)
reset_timestep 0
thermo 1000
thermo_style custom etotal ke pe temp evdwl
run 10000
generated 0 of 0 mixed pair_coeff terms from geometric mixing rule
Per MPI rank memory allocation (min/avg/max) = 8.071 | 8.071 | 8.071 Mbytes
TotEng KinEng PotEng Temp E_vdwl
1335.0956 2270.1367 -935.04108 304.75543 -935.04108
1266.2305 2227.2123 -960.98186 298.99303 -960.98186
1304.2289 2238.1343 -933.90544 300.45925 -933.90544
1311.3201 2232.0862 -920.7661 299.64733 -920.7661
1289.9028 2241.3533 -951.45049 300.89139 -951.45049
1314.2234 2244.8514 -930.62797 301.361 -930.62797
1282.2744 2240.6716 -958.39719 300.79987 -958.39719
1239.302 2181.5711 -942.2691 292.86591 -942.2691
1327.0954 2242.6441 -915.54875 301.06468 -915.54875
1334.9799 2239.6841 -904.70423 300.66731 -904.70423
1320.6105 2263.4912 -942.88066 303.8633 -942.88066
Loop time of 23.3399 on 4 procs for 10000 steps with 2500 atoms
Performance: 37.018 ns/day, 0.648 hours/ns, 428.451 timesteps/s
99.5% CPU use with 4 MPI tasks x 1 OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 21.343 | 21.606 | 21.766 | 3.7 | 92.57
Bond | 0.00045963 | 0.0004817 | 0.0005083 | 0.0 | 0.00
Neigh | 0.20708 | 0.22081 | 0.22733 | 1.7 | 0.95
Comm | 0.63014 | 0.80326 | 1.0801 | 19.8 | 3.44
Output | 0.11791 | 0.14443 | 0.22211 | 11.8 | 0.62
Modify | 0.37291 | 0.389 | 0.41719 | 2.7 | 1.67
Other | | 0.1761 | | | 0.75
Nlocal: 625.000 ave 636 max 613 min
Histogram: 1 0 0 0 0 2 0 0 0 1
Nghost: 3597.00 ave 3613 max 3580 min
Histogram: 1 0 0 1 0 0 0 1 0 1
Neighs: 66408.5 ave 69186 max 61728 min
Histogram: 1 0 0 0 0 0 1 0 1 1
Total # of neighbors = 265634
Ave neighs/atom = 106.25360
Ave special neighs/atom = 0.0000000
Neighbor list builds = 102
Dangerous builds = 0
Total wall time: 0:00:27

View File

@ -1,4 +1,4 @@
#LOCAL DENSITY POTENTIALS
#LOCAL DENSITY POTENTIALS UNITS: real
1 500

View File

@ -1,4 +1,4 @@
# UNITS: real
PairMM
N 500 R 2.00000e-02 1.50000e+01

View File

@ -0,0 +1,30 @@
# monolayer MoS2
units metal
boundary p p f
processors * * 1
atom_style atomic
read_data single_layer_MoS2.data
mass * 32.065 # mass of sulphur atom , uint: a.u.=1.66X10^(-27)kg
mass 1 95.94 # mass of molebdenum atom , uint: a.u.=1.66X10^(-27)kg
########################## Define potentials ################################
pair_style sw/mod maxdelcs 0.25 0.35
pair_coeff * * tmd.sw.mod Mo S S
#########################################################################
### Simulation settings ####
timestep 0.001
velocity all create 300.0 12345
############################
# Output
thermo 500
thermo_style custom step etotal pe ke temp
thermo_modify lost warn
###### Run molecular dynamics ######
fix thermostat all nve
run 5000

View File

@ -0,0 +1,92 @@
LAMMPS (27 Oct 2021)
# monolayer MoS2
units metal
boundary p p f
processors * * 1
atom_style atomic
read_data single_layer_MoS2.data
Reading data file ...
triclinic box = (0.0000000 0.0000000 -100.00000) to (51.152320 44.299209 100.00000) with tilt (25.576160 0.0000000 0.0000000)
1 by 1 by 1 MPI processor grid
reading atoms ...
768 atoms
read_data CPU = 0.043 seconds
mass * 32.065 # mass of sulphur atom , uint: a.u.=1.66X10^(-27)kg
mass 1 95.94 # mass of molebdenum atom , uint: a.u.=1.66X10^(-27)kg
########################## Define potentials ################################
pair_style sw/mod maxdelcs 0.25 0.35
pair_coeff * * tmd.sw.mod Mo S S
Reading sw potential file tmd.sw.mod with DATE: 2018-03-26
#########################################################################
### Simulation settings ####
timestep 0.001
velocity all create 300.0 12345
############################
# Output
thermo 500
thermo_style custom step etotal pe ke temp
thermo_modify lost warn
###### Run molecular dynamics ######
fix thermostat all nve
run 5000
Neighbor list info ...
update every 1 steps, delay 10 steps, check yes
max neighbors/atom: 2000, page size: 100000
master list distance cutoff = 5.158796
ghost atom cutoff = 5.158796
binsize = 2.579398, bins = 30 18 78
1 neighbor lists, perpetual/occasional/extra = 1 0 0
(1) pair sw/mod, perpetual
attributes: full, newton on
pair build: full/bin/atomonly
stencil: full/bin/3d
bin: standard
Per MPI rank memory allocation (min/avg/max) = 3.466 | 3.466 | 3.466 Mbytes
Step TotEng PotEng KinEng Temp
0 -899.28605 -929.02881 29.742759 300
500 -899.28626 -922.45519 23.168929 233.69313
1000 -899.29247 -925.86547 26.573002 268.02828
1500 -899.27957 -916.95478 17.675214 178.28084
2000 -899.28171 -918.38728 19.105573 192.70814
2500 -899.28732 -922.50423 23.21691 234.17709
3000 -899.28195 -918.74112 19.459174 196.27473
3500 -899.27944 -918.03105 18.751604 189.13784
4000 -899.28397 -920.50737 21.223397 214.06955
4500 -899.28386 -919.79154 20.507685 206.85053
5000 -899.28077 -918.78947 19.508698 196.77425
Loop time of 5.84317 on 1 procs for 5000 steps with 768 atoms
Performance: 73.932 ns/day, 0.325 hours/ns, 855.700 timesteps/s
99.8% CPU use with 1 MPI tasks x no OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 5.6796 | 5.6796 | 5.6796 | 0.0 | 97.20
Neigh | 0 | 0 | 0 | 0.0 | 0.00
Comm | 0.026354 | 0.026354 | 0.026354 | 0.0 | 0.45
Output | 0.0014959 | 0.0014959 | 0.0014959 | 0.0 | 0.03
Modify | 0.090437 | 0.090437 | 0.090437 | 0.0 | 1.55
Other | | 0.04524 | | | 0.77
Nlocal: 768.000 ave 768 max 768 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Nghost: 354.000 ave 354 max 354 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Neighs: 0.00000 ave 0 max 0 min
Histogram: 1 0 0 0 0 0 0 0 0 0
FullNghs: 20480.0 ave 20480 max 20480 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Total # of neighbors = 20480
Ave neighs/atom = 26.666667
Neighbor list builds = 0
Dangerous builds = 0
Total wall time: 0:00:06

View File

@ -0,0 +1,781 @@
Single layer MoS2
768 atoms
3 atom types
0.0000000000000000 51.1523200000000177 xlo xhi
0.0000000000000000 44.2992085825108320 ylo yhi
-100.0000000000000000 100.0000000000000000 zlo zhi
25.5761600000000088 0.0000000000000000 0.0000000000000000 xy xz yz
Atoms
1 2 0.000000000000000 0.000000000000000 -1.596930000000000
2 3 0.000000000000000 0.000000000000000 1.596930000000000
3 1 0.000000000000000 1.845800357604618 0.000000000000000
4 2 1.598510000000001 2.768700536406927 -1.596930000000000
5 3 1.598510000000001 2.768700536406927 1.596930000000000
6 1 1.598510000000001 4.614500894011545 0.000000000000000
7 2 3.197020000000001 5.537401072813854 -1.596930000000000
8 3 3.197020000000001 5.537401072813854 1.596930000000000
9 1 3.197020000000001 7.383201430418472 0.000000000000000
10 2 4.795530000000002 8.306101609220781 -1.596930000000000
11 3 4.795530000000002 8.306101609220781 1.596930000000000
12 1 4.795530000000002 10.151901966825399 0.000000000000000
13 2 6.394040000000002 11.074802145627708 -1.596930000000000
14 3 6.394040000000002 11.074802145627708 1.596930000000000
15 1 6.394040000000002 12.920602503232326 0.000000000000000
16 2 7.992550000000003 13.843502682034635 -1.596930000000000
17 3 7.992550000000003 13.843502682034635 1.596930000000000
18 1 7.992550000000003 15.689303039639253 0.000000000000000
19 2 9.591060000000003 16.612203218441562 -1.596930000000000
20 3 9.591060000000003 16.612203218441562 1.596930000000000
21 1 9.591060000000003 18.458003576046180 0.000000000000000
22 2 11.189570000000004 19.380903754848489 -1.596930000000000
23 3 11.189570000000004 19.380903754848489 1.596930000000000
24 1 11.189570000000004 21.226704112453107 0.000000000000000
25 2 12.788080000000004 22.149604291255416 -1.596930000000000
26 3 12.788080000000004 22.149604291255416 1.596930000000000
27 1 12.788080000000004 23.995404648860034 0.000000000000000
28 2 14.386590000000005 24.918304827662343 -1.596930000000000
29 3 14.386590000000005 24.918304827662343 1.596930000000000
30 1 14.386590000000005 26.764105185266961 0.000000000000000
31 2 15.985100000000006 27.687005364069270 -1.596930000000000
32 3 15.985100000000006 27.687005364069270 1.596930000000000
33 1 15.985100000000006 29.532805721673888 0.000000000000000
34 2 17.583610000000006 30.455705900476197 -1.596930000000000
35 3 17.583610000000006 30.455705900476197 1.596930000000000
36 1 17.583610000000006 32.301506258080815 0.000000000000000
37 2 19.182120000000007 33.224406436883124 -1.596930000000000
38 3 19.182120000000007 33.224406436883124 1.596930000000000
39 1 19.182120000000007 35.070206794487742 0.000000000000000
40 2 20.780630000000007 35.993106973290051 -1.596930000000000
41 3 20.780630000000007 35.993106973290051 1.596930000000000
42 1 20.780630000000007 37.838907330894669 0.000000000000000
43 2 22.379140000000008 38.761807509696978 -1.596930000000000
44 3 22.379140000000008 38.761807509696978 1.596930000000000
45 1 22.379140000000008 40.607607867301596 0.000000000000000
46 2 23.977650000000008 41.530508046103905 -1.596930000000000
47 3 23.977650000000008 41.530508046103905 1.596930000000000
48 1 23.977650000000008 43.376308403708523 0.000000000000000
49 2 3.197020000000001 0.000000000000000 -1.596930000000000
50 3 3.197020000000001 0.000000000000000 1.596930000000000
51 1 3.197020000000001 1.845800357604618 0.000000000000000
52 2 4.795530000000002 2.768700536406927 -1.596930000000000
53 3 4.795530000000002 2.768700536406927 1.596930000000000
54 1 4.795530000000002 4.614500894011545 0.000000000000000
55 2 6.394040000000002 5.537401072813854 -1.596930000000000
56 3 6.394040000000002 5.537401072813854 1.596930000000000
57 1 6.394040000000002 7.383201430418472 0.000000000000000
58 2 7.992550000000003 8.306101609220781 -1.596930000000000
59 3 7.992550000000003 8.306101609220781 1.596930000000000
60 1 7.992550000000003 10.151901966825399 0.000000000000000
61 2 9.591060000000003 11.074802145627708 -1.596930000000000
62 3 9.591060000000003 11.074802145627708 1.596930000000000
63 1 9.591060000000003 12.920602503232326 0.000000000000000
64 2 11.189570000000004 13.843502682034635 -1.596930000000000
65 3 11.189570000000004 13.843502682034635 1.596930000000000
66 1 11.189570000000004 15.689303039639253 0.000000000000000
67 2 12.788080000000004 16.612203218441562 -1.596930000000000
68 3 12.788080000000004 16.612203218441562 1.596930000000000
69 1 12.788080000000004 18.458003576046180 0.000000000000000
70 2 14.386590000000005 19.380903754848489 -1.596930000000000
71 3 14.386590000000005 19.380903754848489 1.596930000000000
72 1 14.386590000000005 21.226704112453107 0.000000000000000
73 2 15.985100000000006 22.149604291255416 -1.596930000000000
74 3 15.985100000000006 22.149604291255416 1.596930000000000
75 1 15.985100000000006 23.995404648860034 0.000000000000000
76 2 17.583610000000006 24.918304827662343 -1.596930000000000
77 3 17.583610000000006 24.918304827662343 1.596930000000000
78 1 17.583610000000006 26.764105185266961 0.000000000000000
79 2 19.182120000000007 27.687005364069270 -1.596930000000000
80 3 19.182120000000007 27.687005364069270 1.596930000000000
81 1 19.182120000000007 29.532805721673888 0.000000000000000
82 2 20.780630000000007 30.455705900476197 -1.596930000000000
83 3 20.780630000000007 30.455705900476197 1.596930000000000
84 1 20.780630000000007 32.301506258080815 0.000000000000000
85 2 22.379140000000008 33.224406436883124 -1.596930000000000
86 3 22.379140000000008 33.224406436883124 1.596930000000000
87 1 22.379140000000008 35.070206794487742 0.000000000000000
88 2 23.977650000000008 35.993106973290051 -1.596930000000000
89 3 23.977650000000008 35.993106973290051 1.596930000000000
90 1 23.977650000000008 37.838907330894669 0.000000000000000
91 2 25.576160000000009 38.761807509696978 -1.596930000000000
92 3 25.576160000000009 38.761807509696978 1.596930000000000
93 1 25.576160000000009 40.607607867301596 0.000000000000000
94 2 27.174670000000009 41.530508046103905 -1.596930000000000
95 3 27.174670000000009 41.530508046103905 1.596930000000000
96 1 27.174670000000009 43.376308403708523 0.000000000000000
97 2 6.394040000000002 0.000000000000000 -1.596930000000000
98 3 6.394040000000002 0.000000000000000 1.596930000000000
99 1 6.394040000000002 1.845800357604618 0.000000000000000
100 2 7.992550000000003 2.768700536406927 -1.596930000000000
101 3 7.992550000000003 2.768700536406927 1.596930000000000
102 1 7.992550000000003 4.614500894011545 0.000000000000000
103 2 9.591060000000003 5.537401072813854 -1.596930000000000
104 3 9.591060000000003 5.537401072813854 1.596930000000000
105 1 9.591060000000003 7.383201430418472 0.000000000000000
106 2 11.189570000000004 8.306101609220781 -1.596930000000000
107 3 11.189570000000004 8.306101609220781 1.596930000000000
108 1 11.189570000000004 10.151901966825399 0.000000000000000
109 2 12.788080000000004 11.074802145627708 -1.596930000000000
110 3 12.788080000000004 11.074802145627708 1.596930000000000
111 1 12.788080000000004 12.920602503232326 0.000000000000000
112 2 14.386590000000005 13.843502682034635 -1.596930000000000
113 3 14.386590000000005 13.843502682034635 1.596930000000000
114 1 14.386590000000005 15.689303039639253 0.000000000000000
115 2 15.985100000000006 16.612203218441562 -1.596930000000000
116 3 15.985100000000006 16.612203218441562 1.596930000000000
117 1 15.985100000000006 18.458003576046180 0.000000000000000
118 2 17.583610000000006 19.380903754848489 -1.596930000000000
119 3 17.583610000000006 19.380903754848489 1.596930000000000
120 1 17.583610000000006 21.226704112453107 0.000000000000000
121 2 19.182120000000007 22.149604291255416 -1.596930000000000
122 3 19.182120000000007 22.149604291255416 1.596930000000000
123 1 19.182120000000007 23.995404648860034 0.000000000000000
124 2 20.780630000000007 24.918304827662343 -1.596930000000000
125 3 20.780630000000007 24.918304827662343 1.596930000000000
126 1 20.780630000000007 26.764105185266961 0.000000000000000
127 2 22.379140000000008 27.687005364069270 -1.596930000000000
128 3 22.379140000000008 27.687005364069270 1.596930000000000
129 1 22.379140000000008 29.532805721673888 0.000000000000000
130 2 23.977650000000008 30.455705900476197 -1.596930000000000
131 3 23.977650000000008 30.455705900476197 1.596930000000000
132 1 23.977650000000008 32.301506258080815 0.000000000000000
133 2 25.576160000000009 33.224406436883124 -1.596930000000000
134 3 25.576160000000009 33.224406436883124 1.596930000000000
135 1 25.576160000000009 35.070206794487742 0.000000000000000
136 2 27.174670000000009 35.993106973290051 -1.596930000000000
137 3 27.174670000000009 35.993106973290051 1.596930000000000
138 1 27.174670000000009 37.838907330894669 0.000000000000000
139 2 28.773180000000010 38.761807509696978 -1.596930000000000
140 3 28.773180000000010 38.761807509696978 1.596930000000000
141 1 28.773180000000010 40.607607867301596 0.000000000000000
142 2 30.371690000000011 41.530508046103905 -1.596930000000000
143 3 30.371690000000011 41.530508046103905 1.596930000000000
144 1 30.371690000000011 43.376308403708523 0.000000000000000
145 2 9.591060000000003 0.000000000000000 -1.596930000000000
146 3 9.591060000000003 0.000000000000000 1.596930000000000
147 1 9.591060000000003 1.845800357604618 0.000000000000000
148 2 11.189570000000004 2.768700536406927 -1.596930000000000
149 3 11.189570000000004 2.768700536406927 1.596930000000000
150 1 11.189570000000004 4.614500894011545 0.000000000000000
151 2 12.788080000000004 5.537401072813854 -1.596930000000000
152 3 12.788080000000004 5.537401072813854 1.596930000000000
153 1 12.788080000000004 7.383201430418472 0.000000000000000
154 2 14.386590000000005 8.306101609220781 -1.596930000000000
155 3 14.386590000000005 8.306101609220781 1.596930000000000
156 1 14.386590000000005 10.151901966825399 0.000000000000000
157 2 15.985100000000006 11.074802145627708 -1.596930000000000
158 3 15.985100000000006 11.074802145627708 1.596930000000000
159 1 15.985100000000006 12.920602503232326 0.000000000000000
160 2 17.583610000000006 13.843502682034635 -1.596930000000000
161 3 17.583610000000006 13.843502682034635 1.596930000000000
162 1 17.583610000000006 15.689303039639253 0.000000000000000
163 2 19.182120000000007 16.612203218441562 -1.596930000000000
164 3 19.182120000000007 16.612203218441562 1.596930000000000
165 1 19.182120000000007 18.458003576046180 0.000000000000000
166 2 20.780630000000007 19.380903754848489 -1.596930000000000
167 3 20.780630000000007 19.380903754848489 1.596930000000000
168 1 20.780630000000007 21.226704112453107 0.000000000000000
169 2 22.379140000000008 22.149604291255416 -1.596930000000000
170 3 22.379140000000008 22.149604291255416 1.596930000000000
171 1 22.379140000000008 23.995404648860034 0.000000000000000
172 2 23.977650000000008 24.918304827662343 -1.596930000000000
173 3 23.977650000000008 24.918304827662343 1.596930000000000
174 1 23.977650000000008 26.764105185266961 0.000000000000000
175 2 25.576160000000009 27.687005364069270 -1.596930000000000
176 3 25.576160000000009 27.687005364069270 1.596930000000000
177 1 25.576160000000009 29.532805721673888 0.000000000000000
178 2 27.174670000000009 30.455705900476197 -1.596930000000000
179 3 27.174670000000009 30.455705900476197 1.596930000000000
180 1 27.174670000000009 32.301506258080815 0.000000000000000
181 2 28.773180000000010 33.224406436883124 -1.596930000000000
182 3 28.773180000000010 33.224406436883124 1.596930000000000
183 1 28.773180000000010 35.070206794487742 0.000000000000000
184 2 30.371690000000011 35.993106973290051 -1.596930000000000
185 3 30.371690000000011 35.993106973290051 1.596930000000000
186 1 30.371690000000011 37.838907330894669 0.000000000000000
187 2 31.970200000000011 38.761807509696978 -1.596930000000000
188 3 31.970200000000011 38.761807509696978 1.596930000000000
189 1 31.970200000000011 40.607607867301596 0.000000000000000
190 2 33.568710000000012 41.530508046103905 -1.596930000000000
191 3 33.568710000000012 41.530508046103905 1.596930000000000
192 1 33.568710000000012 43.376308403708523 0.000000000000000
193 2 12.788080000000004 0.000000000000000 -1.596930000000000
194 3 12.788080000000004 0.000000000000000 1.596930000000000
195 1 12.788080000000004 1.845800357604618 0.000000000000000
196 2 14.386590000000005 2.768700536406927 -1.596930000000000
197 3 14.386590000000005 2.768700536406927 1.596930000000000
198 1 14.386590000000005 4.614500894011545 0.000000000000000
199 2 15.985100000000006 5.537401072813854 -1.596930000000000
200 3 15.985100000000006 5.537401072813854 1.596930000000000
201 1 15.985100000000006 7.383201430418472 0.000000000000000
202 2 17.583610000000006 8.306101609220781 -1.596930000000000
203 3 17.583610000000006 8.306101609220781 1.596930000000000
204 1 17.583610000000006 10.151901966825399 0.000000000000000
205 2 19.182120000000007 11.074802145627708 -1.596930000000000
206 3 19.182120000000007 11.074802145627708 1.596930000000000
207 1 19.182120000000007 12.920602503232326 0.000000000000000
208 2 20.780630000000007 13.843502682034635 -1.596930000000000
209 3 20.780630000000007 13.843502682034635 1.596930000000000
210 1 20.780630000000007 15.689303039639253 0.000000000000000
211 2 22.379140000000008 16.612203218441562 -1.596930000000000
212 3 22.379140000000008 16.612203218441562 1.596930000000000
213 1 22.379140000000008 18.458003576046180 0.000000000000000
214 2 23.977650000000008 19.380903754848489 -1.596930000000000
215 3 23.977650000000008 19.380903754848489 1.596930000000000
216 1 23.977650000000008 21.226704112453107 0.000000000000000
217 2 25.576160000000009 22.149604291255416 -1.596930000000000
218 3 25.576160000000009 22.149604291255416 1.596930000000000
219 1 25.576160000000009 23.995404648860034 0.000000000000000
220 2 27.174670000000009 24.918304827662343 -1.596930000000000
221 3 27.174670000000009 24.918304827662343 1.596930000000000
222 1 27.174670000000009 26.764105185266961 0.000000000000000
223 2 28.773180000000010 27.687005364069270 -1.596930000000000
224 3 28.773180000000010 27.687005364069270 1.596930000000000
225 1 28.773180000000010 29.532805721673888 0.000000000000000
226 2 30.371690000000011 30.455705900476197 -1.596930000000000
227 3 30.371690000000011 30.455705900476197 1.596930000000000
228 1 30.371690000000011 32.301506258080815 0.000000000000000
229 2 31.970200000000011 33.224406436883124 -1.596930000000000
230 3 31.970200000000011 33.224406436883124 1.596930000000000
231 1 31.970200000000011 35.070206794487742 0.000000000000000
232 2 33.568710000000012 35.993106973290051 -1.596930000000000
233 3 33.568710000000012 35.993106973290051 1.596930000000000
234 1 33.568710000000012 37.838907330894669 0.000000000000000
235 2 35.167220000000012 38.761807509696978 -1.596930000000000
236 3 35.167220000000012 38.761807509696978 1.596930000000000
237 1 35.167220000000012 40.607607867301596 0.000000000000000
238 2 36.765730000000013 41.530508046103905 -1.596930000000000
239 3 36.765730000000013 41.530508046103905 1.596930000000000
240 1 36.765730000000013 43.376308403708523 0.000000000000000
241 2 15.985100000000006 0.000000000000000 -1.596930000000000
242 3 15.985100000000006 0.000000000000000 1.596930000000000
243 1 15.985100000000006 1.845800357604618 0.000000000000000
244 2 17.583610000000006 2.768700536406927 -1.596930000000000
245 3 17.583610000000006 2.768700536406927 1.596930000000000
246 1 17.583610000000006 4.614500894011545 0.000000000000000
247 2 19.182120000000007 5.537401072813854 -1.596930000000000
248 3 19.182120000000007 5.537401072813854 1.596930000000000
249 1 19.182120000000007 7.383201430418472 0.000000000000000
250 2 20.780630000000007 8.306101609220781 -1.596930000000000
251 3 20.780630000000007 8.306101609220781 1.596930000000000
252 1 20.780630000000007 10.151901966825399 0.000000000000000
253 2 22.379140000000008 11.074802145627708 -1.596930000000000
254 3 22.379140000000008 11.074802145627708 1.596930000000000
255 1 22.379140000000008 12.920602503232326 0.000000000000000
256 2 23.977650000000008 13.843502682034635 -1.596930000000000
257 3 23.977650000000008 13.843502682034635 1.596930000000000
258 1 23.977650000000008 15.689303039639253 0.000000000000000
259 2 25.576160000000009 16.612203218441562 -1.596930000000000
260 3 25.576160000000009 16.612203218441562 1.596930000000000
261 1 25.576160000000009 18.458003576046180 0.000000000000000
262 2 27.174670000000009 19.380903754848489 -1.596930000000000
263 3 27.174670000000009 19.380903754848489 1.596930000000000
264 1 27.174670000000009 21.226704112453107 0.000000000000000
265 2 28.773180000000010 22.149604291255416 -1.596930000000000
266 3 28.773180000000010 22.149604291255416 1.596930000000000
267 1 28.773180000000010 23.995404648860034 0.000000000000000
268 2 30.371690000000011 24.918304827662343 -1.596930000000000
269 3 30.371690000000011 24.918304827662343 1.596930000000000
270 1 30.371690000000011 26.764105185266961 0.000000000000000
271 2 31.970200000000011 27.687005364069270 -1.596930000000000
272 3 31.970200000000011 27.687005364069270 1.596930000000000
273 1 31.970200000000011 29.532805721673888 0.000000000000000
274 2 33.568710000000012 30.455705900476197 -1.596930000000000
275 3 33.568710000000012 30.455705900476197 1.596930000000000
276 1 33.568710000000012 32.301506258080815 0.000000000000000
277 2 35.167220000000012 33.224406436883124 -1.596930000000000
278 3 35.167220000000012 33.224406436883124 1.596930000000000
279 1 35.167220000000012 35.070206794487742 0.000000000000000
280 2 36.765730000000013 35.993106973290051 -1.596930000000000
281 3 36.765730000000013 35.993106973290051 1.596930000000000
282 1 36.765730000000013 37.838907330894669 0.000000000000000
283 2 38.364240000000013 38.761807509696978 -1.596930000000000
284 3 38.364240000000013 38.761807509696978 1.596930000000000
285 1 38.364240000000013 40.607607867301596 0.000000000000000
286 2 39.962750000000014 41.530508046103905 -1.596930000000000
287 3 39.962750000000014 41.530508046103905 1.596930000000000
288 1 39.962750000000014 43.376308403708523 0.000000000000000
289 2 19.182120000000007 0.000000000000000 -1.596930000000000
290 3 19.182120000000007 0.000000000000000 1.596930000000000
291 1 19.182120000000007 1.845800357604618 0.000000000000000
292 2 20.780630000000007 2.768700536406927 -1.596930000000000
293 3 20.780630000000007 2.768700536406927 1.596930000000000
294 1 20.780630000000007 4.614500894011545 0.000000000000000
295 2 22.379140000000008 5.537401072813854 -1.596930000000000
296 3 22.379140000000008 5.537401072813854 1.596930000000000
297 1 22.379140000000008 7.383201430418472 0.000000000000000
298 2 23.977650000000008 8.306101609220781 -1.596930000000000
299 3 23.977650000000008 8.306101609220781 1.596930000000000
300 1 23.977650000000008 10.151901966825399 0.000000000000000
301 2 25.576160000000009 11.074802145627708 -1.596930000000000
302 3 25.576160000000009 11.074802145627708 1.596930000000000
303 1 25.576160000000009 12.920602503232326 0.000000000000000
304 2 27.174670000000009 13.843502682034635 -1.596930000000000
305 3 27.174670000000009 13.843502682034635 1.596930000000000
306 1 27.174670000000009 15.689303039639253 0.000000000000000
307 2 28.773180000000010 16.612203218441562 -1.596930000000000
308 3 28.773180000000010 16.612203218441562 1.596930000000000
309 1 28.773180000000010 18.458003576046180 0.000000000000000
310 2 30.371690000000011 19.380903754848489 -1.596930000000000
311 3 30.371690000000011 19.380903754848489 1.596930000000000
312 1 30.371690000000011 21.226704112453107 0.000000000000000
313 2 31.970200000000011 22.149604291255416 -1.596930000000000
314 3 31.970200000000011 22.149604291255416 1.596930000000000
315 1 31.970200000000011 23.995404648860034 0.000000000000000
316 2 33.568710000000012 24.918304827662343 -1.596930000000000
317 3 33.568710000000012 24.918304827662343 1.596930000000000
318 1 33.568710000000012 26.764105185266961 0.000000000000000
319 2 35.167220000000012 27.687005364069270 -1.596930000000000
320 3 35.167220000000012 27.687005364069270 1.596930000000000
321 1 35.167220000000012 29.532805721673888 0.000000000000000
322 2 36.765730000000013 30.455705900476197 -1.596930000000000
323 3 36.765730000000013 30.455705900476197 1.596930000000000
324 1 36.765730000000013 32.301506258080815 0.000000000000000
325 2 38.364240000000013 33.224406436883124 -1.596930000000000
326 3 38.364240000000013 33.224406436883124 1.596930000000000
327 1 38.364240000000013 35.070206794487742 0.000000000000000
328 2 39.962750000000014 35.993106973290051 -1.596930000000000
329 3 39.962750000000014 35.993106973290051 1.596930000000000
330 1 39.962750000000014 37.838907330894669 0.000000000000000
331 2 41.561260000000014 38.761807509696978 -1.596930000000000
332 3 41.561260000000014 38.761807509696978 1.596930000000000
333 1 41.561260000000014 40.607607867301596 0.000000000000000
334 2 43.159770000000015 41.530508046103905 -1.596930000000000
335 3 43.159770000000015 41.530508046103905 1.596930000000000
336 1 43.159770000000015 43.376308403708523 0.000000000000000
337 2 22.379140000000008 0.000000000000000 -1.596930000000000
338 3 22.379140000000008 0.000000000000000 1.596930000000000
339 1 22.379140000000008 1.845800357604618 0.000000000000000
340 2 23.977650000000008 2.768700536406927 -1.596930000000000
341 3 23.977650000000008 2.768700536406927 1.596930000000000
342 1 23.977650000000008 4.614500894011545 0.000000000000000
343 2 25.576160000000009 5.537401072813854 -1.596930000000000
344 3 25.576160000000009 5.537401072813854 1.596930000000000
345 1 25.576160000000009 7.383201430418472 0.000000000000000
346 2 27.174670000000009 8.306101609220781 -1.596930000000000
347 3 27.174670000000009 8.306101609220781 1.596930000000000
348 1 27.174670000000009 10.151901966825399 0.000000000000000
349 2 28.773180000000010 11.074802145627708 -1.596930000000000
350 3 28.773180000000010 11.074802145627708 1.596930000000000
351 1 28.773180000000010 12.920602503232326 0.000000000000000
352 2 30.371690000000011 13.843502682034635 -1.596930000000000
353 3 30.371690000000011 13.843502682034635 1.596930000000000
354 1 30.371690000000011 15.689303039639253 0.000000000000000
355 2 31.970200000000011 16.612203218441562 -1.596930000000000
356 3 31.970200000000011 16.612203218441562 1.596930000000000
357 1 31.970200000000011 18.458003576046180 0.000000000000000
358 2 33.568710000000012 19.380903754848489 -1.596930000000000
359 3 33.568710000000012 19.380903754848489 1.596930000000000
360 1 33.568710000000012 21.226704112453107 0.000000000000000
361 2 35.167220000000012 22.149604291255416 -1.596930000000000
362 3 35.167220000000012 22.149604291255416 1.596930000000000
363 1 35.167220000000012 23.995404648860034 0.000000000000000
364 2 36.765730000000013 24.918304827662343 -1.596930000000000
365 3 36.765730000000013 24.918304827662343 1.596930000000000
366 1 36.765730000000013 26.764105185266961 0.000000000000000
367 2 38.364240000000013 27.687005364069270 -1.596930000000000
368 3 38.364240000000013 27.687005364069270 1.596930000000000
369 1 38.364240000000013 29.532805721673888 0.000000000000000
370 2 39.962750000000014 30.455705900476197 -1.596930000000000
371 3 39.962750000000014 30.455705900476197 1.596930000000000
372 1 39.962750000000014 32.301506258080815 0.000000000000000
373 2 41.561260000000014 33.224406436883124 -1.596930000000000
374 3 41.561260000000014 33.224406436883124 1.596930000000000
375 1 41.561260000000014 35.070206794487742 0.000000000000000
376 2 43.159770000000015 35.993106973290051 -1.596930000000000
377 3 43.159770000000015 35.993106973290051 1.596930000000000
378 1 43.159770000000015 37.838907330894669 0.000000000000000
379 2 44.758280000000015 38.761807509696978 -1.596930000000000
380 3 44.758280000000015 38.761807509696978 1.596930000000000
381 1 44.758280000000015 40.607607867301596 0.000000000000000
382 2 46.356790000000016 41.530508046103905 -1.596930000000000
383 3 46.356790000000016 41.530508046103905 1.596930000000000
384 1 46.356790000000016 43.376308403708523 0.000000000000000
385 2 25.576160000000009 0.000000000000000 -1.596930000000000
386 3 25.576160000000009 0.000000000000000 1.596930000000000
387 1 25.576160000000009 1.845800357604618 0.000000000000000
388 2 27.174670000000009 2.768700536406927 -1.596930000000000
389 3 27.174670000000009 2.768700536406927 1.596930000000000
390 1 27.174670000000009 4.614500894011545 0.000000000000000
391 2 28.773180000000010 5.537401072813854 -1.596930000000000
392 3 28.773180000000010 5.537401072813854 1.596930000000000
393 1 28.773180000000010 7.383201430418472 0.000000000000000
394 2 30.371690000000011 8.306101609220781 -1.596930000000000
395 3 30.371690000000011 8.306101609220781 1.596930000000000
396 1 30.371690000000011 10.151901966825399 0.000000000000000
397 2 31.970200000000011 11.074802145627708 -1.596930000000000
398 3 31.970200000000011 11.074802145627708 1.596930000000000
399 1 31.970200000000011 12.920602503232326 0.000000000000000
400 2 33.568710000000012 13.843502682034635 -1.596930000000000
401 3 33.568710000000012 13.843502682034635 1.596930000000000
402 1 33.568710000000012 15.689303039639253 0.000000000000000
403 2 35.167220000000012 16.612203218441562 -1.596930000000000
404 3 35.167220000000012 16.612203218441562 1.596930000000000
405 1 35.167220000000012 18.458003576046180 0.000000000000000
406 2 36.765730000000013 19.380903754848489 -1.596930000000000
407 3 36.765730000000013 19.380903754848489 1.596930000000000
408 1 36.765730000000013 21.226704112453107 0.000000000000000
409 2 38.364240000000013 22.149604291255416 -1.596930000000000
410 3 38.364240000000013 22.149604291255416 1.596930000000000
411 1 38.364240000000013 23.995404648860034 0.000000000000000
412 2 39.962750000000014 24.918304827662343 -1.596930000000000
413 3 39.962750000000014 24.918304827662343 1.596930000000000
414 1 39.962750000000014 26.764105185266961 0.000000000000000
415 2 41.561260000000014 27.687005364069270 -1.596930000000000
416 3 41.561260000000014 27.687005364069270 1.596930000000000
417 1 41.561260000000014 29.532805721673888 0.000000000000000
418 2 43.159770000000015 30.455705900476197 -1.596930000000000
419 3 43.159770000000015 30.455705900476197 1.596930000000000
420 1 43.159770000000015 32.301506258080815 0.000000000000000
421 2 44.758280000000015 33.224406436883124 -1.596930000000000
422 3 44.758280000000015 33.224406436883124 1.596930000000000
423 1 44.758280000000015 35.070206794487742 0.000000000000000
424 2 46.356790000000016 35.993106973290051 -1.596930000000000
425 3 46.356790000000016 35.993106973290051 1.596930000000000
426 1 46.356790000000016 37.838907330894669 0.000000000000000
427 2 47.955300000000017 38.761807509696978 -1.596930000000000
428 3 47.955300000000017 38.761807509696978 1.596930000000000
429 1 47.955300000000017 40.607607867301596 0.000000000000000
430 2 49.553810000000017 41.530508046103905 -1.596930000000000
431 3 49.553810000000017 41.530508046103905 1.596930000000000
432 1 49.553810000000017 43.376308403708523 0.000000000000000
433 2 28.773180000000010 0.000000000000000 -1.596930000000000
434 3 28.773180000000010 0.000000000000000 1.596930000000000
435 1 28.773180000000010 1.845800357604618 0.000000000000000
436 2 30.371690000000011 2.768700536406927 -1.596930000000000
437 3 30.371690000000011 2.768700536406927 1.596930000000000
438 1 30.371690000000011 4.614500894011545 0.000000000000000
439 2 31.970200000000011 5.537401072813854 -1.596930000000000
440 3 31.970200000000011 5.537401072813854 1.596930000000000
441 1 31.970200000000011 7.383201430418472 0.000000000000000
442 2 33.568710000000012 8.306101609220781 -1.596930000000000
443 3 33.568710000000012 8.306101609220781 1.596930000000000
444 1 33.568710000000012 10.151901966825399 0.000000000000000
445 2 35.167220000000012 11.074802145627708 -1.596930000000000
446 3 35.167220000000012 11.074802145627708 1.596930000000000
447 1 35.167220000000012 12.920602503232326 0.000000000000000
448 2 36.765730000000013 13.843502682034635 -1.596930000000000
449 3 36.765730000000013 13.843502682034635 1.596930000000000
450 1 36.765730000000013 15.689303039639253 0.000000000000000
451 2 38.364240000000013 16.612203218441562 -1.596930000000000
452 3 38.364240000000013 16.612203218441562 1.596930000000000
453 1 38.364240000000013 18.458003576046180 0.000000000000000
454 2 39.962750000000014 19.380903754848489 -1.596930000000000
455 3 39.962750000000014 19.380903754848489 1.596930000000000
456 1 39.962750000000014 21.226704112453107 0.000000000000000
457 2 41.561260000000014 22.149604291255416 -1.596930000000000
458 3 41.561260000000014 22.149604291255416 1.596930000000000
459 1 41.561260000000014 23.995404648860034 0.000000000000000
460 2 43.159770000000015 24.918304827662343 -1.596930000000000
461 3 43.159770000000015 24.918304827662343 1.596930000000000
462 1 43.159770000000015 26.764105185266961 0.000000000000000
463 2 44.758280000000015 27.687005364069270 -1.596930000000000
464 3 44.758280000000015 27.687005364069270 1.596930000000000
465 1 44.758280000000015 29.532805721673888 0.000000000000000
466 2 46.356790000000016 30.455705900476197 -1.596930000000000
467 3 46.356790000000016 30.455705900476197 1.596930000000000
468 1 46.356790000000016 32.301506258080815 0.000000000000000
469 2 47.955300000000017 33.224406436883124 -1.596930000000000
470 3 47.955300000000017 33.224406436883124 1.596930000000000
471 1 47.955300000000017 35.070206794487742 0.000000000000000
472 2 49.553810000000017 35.993106973290051 -1.596930000000000
473 3 49.553810000000017 35.993106973290051 1.596930000000000
474 1 49.553810000000017 37.838907330894669 0.000000000000000
475 2 51.152320000000018 38.761807509696978 -1.596930000000000
476 3 51.152320000000018 38.761807509696978 1.596930000000000
477 1 51.152320000000018 40.607607867301596 0.000000000000000
478 2 52.750830000000018 41.530508046103905 -1.596930000000000
479 3 52.750830000000018 41.530508046103905 1.596930000000000
480 1 52.750830000000018 43.376308403708523 0.000000000000000
481 2 31.970200000000011 0.000000000000000 -1.596930000000000
482 3 31.970200000000011 0.000000000000000 1.596930000000000
483 1 31.970200000000011 1.845800357604618 0.000000000000000
484 2 33.568710000000012 2.768700536406927 -1.596930000000000
485 3 33.568710000000012 2.768700536406927 1.596930000000000
486 1 33.568710000000012 4.614500894011545 0.000000000000000
487 2 35.167220000000012 5.537401072813854 -1.596930000000000
488 3 35.167220000000012 5.537401072813854 1.596930000000000
489 1 35.167220000000012 7.383201430418472 0.000000000000000
490 2 36.765730000000013 8.306101609220781 -1.596930000000000
491 3 36.765730000000013 8.306101609220781 1.596930000000000
492 1 36.765730000000013 10.151901966825399 0.000000000000000
493 2 38.364240000000013 11.074802145627708 -1.596930000000000
494 3 38.364240000000013 11.074802145627708 1.596930000000000
495 1 38.364240000000013 12.920602503232326 0.000000000000000
496 2 39.962750000000014 13.843502682034635 -1.596930000000000
497 3 39.962750000000014 13.843502682034635 1.596930000000000
498 1 39.962750000000014 15.689303039639253 0.000000000000000
499 2 41.561260000000014 16.612203218441562 -1.596930000000000
500 3 41.561260000000014 16.612203218441562 1.596930000000000
501 1 41.561260000000014 18.458003576046180 0.000000000000000
502 2 43.159770000000015 19.380903754848489 -1.596930000000000
503 3 43.159770000000015 19.380903754848489 1.596930000000000
504 1 43.159770000000015 21.226704112453107 0.000000000000000
505 2 44.758280000000015 22.149604291255416 -1.596930000000000
506 3 44.758280000000015 22.149604291255416 1.596930000000000
507 1 44.758280000000015 23.995404648860034 0.000000000000000
508 2 46.356790000000016 24.918304827662343 -1.596930000000000
509 3 46.356790000000016 24.918304827662343 1.596930000000000
510 1 46.356790000000016 26.764105185266961 0.000000000000000
511 2 47.955300000000017 27.687005364069270 -1.596930000000000
512 3 47.955300000000017 27.687005364069270 1.596930000000000
513 1 47.955300000000017 29.532805721673888 0.000000000000000
514 2 49.553810000000017 30.455705900476197 -1.596930000000000
515 3 49.553810000000017 30.455705900476197 1.596930000000000
516 1 49.553810000000017 32.301506258080815 0.000000000000000
517 2 51.152320000000018 33.224406436883124 -1.596930000000000
518 3 51.152320000000018 33.224406436883124 1.596930000000000
519 1 51.152320000000018 35.070206794487742 0.000000000000000
520 2 52.750830000000018 35.993106973290051 -1.596930000000000
521 3 52.750830000000018 35.993106973290051 1.596930000000000
522 1 52.750830000000018 37.838907330894669 0.000000000000000
523 2 54.349340000000019 38.761807509696978 -1.596930000000000
524 3 54.349340000000019 38.761807509696978 1.596930000000000
525 1 54.349340000000019 40.607607867301596 0.000000000000000
526 2 55.947850000000019 41.530508046103905 -1.596930000000000
527 3 55.947850000000019 41.530508046103905 1.596930000000000
528 1 55.947850000000019 43.376308403708523 0.000000000000000
529 2 35.167220000000012 0.000000000000000 -1.596930000000000
530 3 35.167220000000012 0.000000000000000 1.596930000000000
531 1 35.167220000000012 1.845800357604618 0.000000000000000
532 2 36.765730000000013 2.768700536406927 -1.596930000000000
533 3 36.765730000000013 2.768700536406927 1.596930000000000
534 1 36.765730000000013 4.614500894011545 0.000000000000000
535 2 38.364240000000013 5.537401072813854 -1.596930000000000
536 3 38.364240000000013 5.537401072813854 1.596930000000000
537 1 38.364240000000013 7.383201430418472 0.000000000000000
538 2 39.962750000000014 8.306101609220781 -1.596930000000000
539 3 39.962750000000014 8.306101609220781 1.596930000000000
540 1 39.962750000000014 10.151901966825399 0.000000000000000
541 2 41.561260000000014 11.074802145627708 -1.596930000000000
542 3 41.561260000000014 11.074802145627708 1.596930000000000
543 1 41.561260000000014 12.920602503232326 0.000000000000000
544 2 43.159770000000015 13.843502682034635 -1.596930000000000
545 3 43.159770000000015 13.843502682034635 1.596930000000000
546 1 43.159770000000015 15.689303039639253 0.000000000000000
547 2 44.758280000000015 16.612203218441562 -1.596930000000000
548 3 44.758280000000015 16.612203218441562 1.596930000000000
549 1 44.758280000000015 18.458003576046180 0.000000000000000
550 2 46.356790000000016 19.380903754848489 -1.596930000000000
551 3 46.356790000000016 19.380903754848489 1.596930000000000
552 1 46.356790000000016 21.226704112453107 0.000000000000000
553 2 47.955300000000017 22.149604291255416 -1.596930000000000
554 3 47.955300000000017 22.149604291255416 1.596930000000000
555 1 47.955300000000017 23.995404648860034 0.000000000000000
556 2 49.553810000000017 24.918304827662343 -1.596930000000000
557 3 49.553810000000017 24.918304827662343 1.596930000000000
558 1 49.553810000000017 26.764105185266961 0.000000000000000
559 2 51.152320000000018 27.687005364069270 -1.596930000000000
560 3 51.152320000000018 27.687005364069270 1.596930000000000
561 1 51.152320000000018 29.532805721673888 0.000000000000000
562 2 52.750830000000018 30.455705900476197 -1.596930000000000
563 3 52.750830000000018 30.455705900476197 1.596930000000000
564 1 52.750830000000018 32.301506258080815 0.000000000000000
565 2 54.349340000000019 33.224406436883124 -1.596930000000000
566 3 54.349340000000019 33.224406436883124 1.596930000000000
567 1 54.349340000000019 35.070206794487742 0.000000000000000
568 2 55.947850000000019 35.993106973290051 -1.596930000000000
569 3 55.947850000000019 35.993106973290051 1.596930000000000
570 1 55.947850000000019 37.838907330894669 0.000000000000000
571 2 57.546360000000020 38.761807509696978 -1.596930000000000
572 3 57.546360000000020 38.761807509696978 1.596930000000000
573 1 57.546360000000020 40.607607867301596 0.000000000000000
574 2 59.144870000000020 41.530508046103905 -1.596930000000000
575 3 59.144870000000020 41.530508046103905 1.596930000000000
576 1 59.144870000000020 43.376308403708523 0.000000000000000
577 2 38.364240000000013 0.000000000000000 -1.596930000000000
578 3 38.364240000000013 0.000000000000000 1.596930000000000
579 1 38.364240000000013 1.845800357604618 0.000000000000000
580 2 39.962750000000014 2.768700536406927 -1.596930000000000
581 3 39.962750000000014 2.768700536406927 1.596930000000000
582 1 39.962750000000014 4.614500894011545 0.000000000000000
583 2 41.561260000000014 5.537401072813854 -1.596930000000000
584 3 41.561260000000014 5.537401072813854 1.596930000000000
585 1 41.561260000000014 7.383201430418472 0.000000000000000
586 2 43.159770000000015 8.306101609220781 -1.596930000000000
587 3 43.159770000000015 8.306101609220781 1.596930000000000
588 1 43.159770000000015 10.151901966825399 0.000000000000000
589 2 44.758280000000015 11.074802145627708 -1.596930000000000
590 3 44.758280000000015 11.074802145627708 1.596930000000000
591 1 44.758280000000015 12.920602503232326 0.000000000000000
592 2 46.356790000000016 13.843502682034635 -1.596930000000000
593 3 46.356790000000016 13.843502682034635 1.596930000000000
594 1 46.356790000000016 15.689303039639253 0.000000000000000
595 2 47.955300000000017 16.612203218441562 -1.596930000000000
596 3 47.955300000000017 16.612203218441562 1.596930000000000
597 1 47.955300000000017 18.458003576046180 0.000000000000000
598 2 49.553810000000017 19.380903754848489 -1.596930000000000
599 3 49.553810000000017 19.380903754848489 1.596930000000000
600 1 49.553810000000017 21.226704112453107 0.000000000000000
601 2 51.152320000000018 22.149604291255416 -1.596930000000000
602 3 51.152320000000018 22.149604291255416 1.596930000000000
603 1 51.152320000000018 23.995404648860034 0.000000000000000
604 2 52.750830000000018 24.918304827662343 -1.596930000000000
605 3 52.750830000000018 24.918304827662343 1.596930000000000
606 1 52.750830000000018 26.764105185266961 0.000000000000000
607 2 54.349340000000019 27.687005364069270 -1.596930000000000
608 3 54.349340000000019 27.687005364069270 1.596930000000000
609 1 54.349340000000019 29.532805721673888 0.000000000000000
610 2 55.947850000000019 30.455705900476197 -1.596930000000000
611 3 55.947850000000019 30.455705900476197 1.596930000000000
612 1 55.947850000000019 32.301506258080815 0.000000000000000
613 2 57.546360000000020 33.224406436883124 -1.596930000000000
614 3 57.546360000000020 33.224406436883124 1.596930000000000
615 1 57.546360000000020 35.070206794487742 0.000000000000000
616 2 59.144870000000020 35.993106973290051 -1.596930000000000
617 3 59.144870000000020 35.993106973290051 1.596930000000000
618 1 59.144870000000020 37.838907330894669 0.000000000000000
619 2 60.743380000000021 38.761807509696978 -1.596930000000000
620 3 60.743380000000021 38.761807509696978 1.596930000000000
621 1 60.743380000000021 40.607607867301596 0.000000000000000
622 2 62.341890000000022 41.530508046103905 -1.596930000000000
623 3 62.341890000000022 41.530508046103905 1.596930000000000
624 1 62.341890000000022 43.376308403708523 0.000000000000000
625 2 41.561260000000014 0.000000000000000 -1.596930000000000
626 3 41.561260000000014 0.000000000000000 1.596930000000000
627 1 41.561260000000014 1.845800357604618 0.000000000000000
628 2 43.159770000000015 2.768700536406927 -1.596930000000000
629 3 43.159770000000015 2.768700536406927 1.596930000000000
630 1 43.159770000000015 4.614500894011545 0.000000000000000
631 2 44.758280000000015 5.537401072813854 -1.596930000000000
632 3 44.758280000000015 5.537401072813854 1.596930000000000
633 1 44.758280000000015 7.383201430418472 0.000000000000000
634 2 46.356790000000016 8.306101609220781 -1.596930000000000
635 3 46.356790000000016 8.306101609220781 1.596930000000000
636 1 46.356790000000016 10.151901966825399 0.000000000000000
637 2 47.955300000000017 11.074802145627708 -1.596930000000000
638 3 47.955300000000017 11.074802145627708 1.596930000000000
639 1 47.955300000000017 12.920602503232326 0.000000000000000
640 2 49.553810000000017 13.843502682034635 -1.596930000000000
641 3 49.553810000000017 13.843502682034635 1.596930000000000
642 1 49.553810000000017 15.689303039639253 0.000000000000000
643 2 51.152320000000018 16.612203218441562 -1.596930000000000
644 3 51.152320000000018 16.612203218441562 1.596930000000000
645 1 51.152320000000018 18.458003576046180 0.000000000000000
646 2 52.750830000000018 19.380903754848489 -1.596930000000000
647 3 52.750830000000018 19.380903754848489 1.596930000000000
648 1 52.750830000000018 21.226704112453107 0.000000000000000
649 2 54.349340000000019 22.149604291255416 -1.596930000000000
650 3 54.349340000000019 22.149604291255416 1.596930000000000
651 1 54.349340000000019 23.995404648860034 0.000000000000000
652 2 55.947850000000019 24.918304827662343 -1.596930000000000
653 3 55.947850000000019 24.918304827662343 1.596930000000000
654 1 55.947850000000019 26.764105185266961 0.000000000000000
655 2 57.546360000000020 27.687005364069270 -1.596930000000000
656 3 57.546360000000020 27.687005364069270 1.596930000000000
657 1 57.546360000000020 29.532805721673888 0.000000000000000
658 2 59.144870000000020 30.455705900476197 -1.596930000000000
659 3 59.144870000000020 30.455705900476197 1.596930000000000
660 1 59.144870000000020 32.301506258080815 0.000000000000000
661 2 60.743380000000021 33.224406436883124 -1.596930000000000
662 3 60.743380000000021 33.224406436883124 1.596930000000000
663 1 60.743380000000021 35.070206794487742 0.000000000000000
664 2 62.341890000000022 35.993106973290051 -1.596930000000000
665 3 62.341890000000022 35.993106973290051 1.596930000000000
666 1 62.341890000000022 37.838907330894669 0.000000000000000
667 2 63.940400000000022 38.761807509696978 -1.596930000000000
668 3 63.940400000000022 38.761807509696978 1.596930000000000
669 1 63.940400000000022 40.607607867301596 0.000000000000000
670 2 65.538910000000023 41.530508046103905 -1.596930000000000
671 3 65.538910000000023 41.530508046103905 1.596930000000000
672 1 65.538910000000023 43.376308403708523 0.000000000000000
673 2 44.758280000000015 0.000000000000000 -1.596930000000000
674 3 44.758280000000015 0.000000000000000 1.596930000000000
675 1 44.758280000000015 1.845800357604618 0.000000000000000
676 2 46.356790000000016 2.768700536406927 -1.596930000000000
677 3 46.356790000000016 2.768700536406927 1.596930000000000
678 1 46.356790000000016 4.614500894011545 0.000000000000000
679 2 47.955300000000017 5.537401072813854 -1.596930000000000
680 3 47.955300000000017 5.537401072813854 1.596930000000000
681 1 47.955300000000017 7.383201430418472 0.000000000000000
682 2 49.553810000000017 8.306101609220781 -1.596930000000000
683 3 49.553810000000017 8.306101609220781 1.596930000000000
684 1 49.553810000000017 10.151901966825399 0.000000000000000
685 2 51.152320000000018 11.074802145627708 -1.596930000000000
686 3 51.152320000000018 11.074802145627708 1.596930000000000
687 1 51.152320000000018 12.920602503232326 0.000000000000000
688 2 52.750830000000018 13.843502682034635 -1.596930000000000
689 3 52.750830000000018 13.843502682034635 1.596930000000000
690 1 52.750830000000018 15.689303039639253 0.000000000000000
691 2 54.349340000000019 16.612203218441562 -1.596930000000000
692 3 54.349340000000019 16.612203218441562 1.596930000000000
693 1 54.349340000000019 18.458003576046180 0.000000000000000
694 2 55.947850000000019 19.380903754848489 -1.596930000000000
695 3 55.947850000000019 19.380903754848489 1.596930000000000
696 1 55.947850000000019 21.226704112453107 0.000000000000000
697 2 57.546360000000020 22.149604291255416 -1.596930000000000
698 3 57.546360000000020 22.149604291255416 1.596930000000000
699 1 57.546360000000020 23.995404648860034 0.000000000000000
700 2 59.144870000000020 24.918304827662343 -1.596930000000000
701 3 59.144870000000020 24.918304827662343 1.596930000000000
702 1 59.144870000000020 26.764105185266961 0.000000000000000
703 2 60.743380000000021 27.687005364069270 -1.596930000000000
704 3 60.743380000000021 27.687005364069270 1.596930000000000
705 1 60.743380000000021 29.532805721673888 0.000000000000000
706 2 62.341890000000022 30.455705900476197 -1.596930000000000
707 3 62.341890000000022 30.455705900476197 1.596930000000000
708 1 62.341890000000022 32.301506258080815 0.000000000000000
709 2 63.940400000000022 33.224406436883124 -1.596930000000000
710 3 63.940400000000022 33.224406436883124 1.596930000000000
711 1 63.940400000000022 35.070206794487742 0.000000000000000
712 2 65.538910000000023 35.993106973290051 -1.596930000000000
713 3 65.538910000000023 35.993106973290051 1.596930000000000
714 1 65.538910000000023 37.838907330894669 0.000000000000000
715 2 67.137420000000023 38.761807509696978 -1.596930000000000
716 3 67.137420000000023 38.761807509696978 1.596930000000000
717 1 67.137420000000023 40.607607867301596 0.000000000000000
718 2 68.735930000000024 41.530508046103905 -1.596930000000000
719 3 68.735930000000024 41.530508046103905 1.596930000000000
720 1 68.735930000000024 43.376308403708523 0.000000000000000
721 2 47.955300000000017 0.000000000000000 -1.596930000000000
722 3 47.955300000000017 0.000000000000000 1.596930000000000
723 1 47.955300000000017 1.845800357604618 0.000000000000000
724 2 49.553810000000017 2.768700536406927 -1.596930000000000
725 3 49.553810000000017 2.768700536406927 1.596930000000000
726 1 49.553810000000017 4.614500894011545 0.000000000000000
727 2 51.152320000000018 5.537401072813854 -1.596930000000000
728 3 51.152320000000018 5.537401072813854 1.596930000000000
729 1 51.152320000000018 7.383201430418472 0.000000000000000
730 2 52.750830000000018 8.306101609220781 -1.596930000000000
731 3 52.750830000000018 8.306101609220781 1.596930000000000
732 1 52.750830000000018 10.151901966825399 0.000000000000000
733 2 54.349340000000019 11.074802145627708 -1.596930000000000
734 3 54.349340000000019 11.074802145627708 1.596930000000000
735 1 54.349340000000019 12.920602503232326 0.000000000000000
736 2 55.947850000000019 13.843502682034635 -1.596930000000000
737 3 55.947850000000019 13.843502682034635 1.596930000000000
738 1 55.947850000000019 15.689303039639253 0.000000000000000
739 2 57.546360000000020 16.612203218441562 -1.596930000000000
740 3 57.546360000000020 16.612203218441562 1.596930000000000
741 1 57.546360000000020 18.458003576046180 0.000000000000000
742 2 59.144870000000020 19.380903754848489 -1.596930000000000
743 3 59.144870000000020 19.380903754848489 1.596930000000000
744 1 59.144870000000020 21.226704112453107 0.000000000000000
745 2 60.743380000000021 22.149604291255416 -1.596930000000000
746 3 60.743380000000021 22.149604291255416 1.596930000000000
747 1 60.743380000000021 23.995404648860034 0.000000000000000
748 2 62.341890000000022 24.918304827662343 -1.596930000000000
749 3 62.341890000000022 24.918304827662343 1.596930000000000
750 1 62.341890000000022 26.764105185266961 0.000000000000000
751 2 63.940400000000022 27.687005364069270 -1.596930000000000
752 3 63.940400000000022 27.687005364069270 1.596930000000000
753 1 63.940400000000022 29.532805721673888 0.000000000000000
754 2 65.538910000000023 30.455705900476197 -1.596930000000000
755 3 65.538910000000023 30.455705900476197 1.596930000000000
756 1 65.538910000000023 32.301506258080815 0.000000000000000
757 2 67.137420000000023 33.224406436883124 -1.596930000000000
758 3 67.137420000000023 33.224406436883124 1.596930000000000
759 1 67.137420000000023 35.070206794487742 0.000000000000000
760 2 68.735930000000024 35.993106973290051 -1.596930000000000
761 3 68.735930000000024 35.993106973290051 1.596930000000000
762 1 68.735930000000024 37.838907330894669 0.000000000000000
763 2 70.334440000000024 38.761807509696978 -1.596930000000000
764 3 70.334440000000024 38.761807509696978 1.596930000000000
765 1 70.334440000000024 40.607607867301596 0.000000000000000
766 2 71.932950000000025 41.530508046103905 -1.596930000000000
767 3 71.932950000000025 41.530508046103905 1.596930000000000
768 1 71.932950000000025 43.376308403708523 0.000000000000000

View File

@ -0,0 +1 @@
../../potentials/tmd.sw.mod

View File

@ -17,6 +17,8 @@ parser = ArgumentParser(prog='Install.py',
# settings
CMAKE = os.environ.get('CMAKE') or 'cmake'
thisdir = fullpath('.')
version = "2.2.1"
@ -141,7 +143,7 @@ if buildflag:
# configure kim-api
print("Configuring kim-api ...")
cmd = 'cd "%s/kim-api-%s" && mkdir build && cd build && cmake .. -DCMAKE_INSTALL_PREFIX="%s" -DCMAKE_BUILD_TYPE=Release' % (thisdir,version,kimdir)
cmd = 'cd "%s/kim-api-%s" && mkdir build && cd build && %s .. -DCMAKE_INSTALL_PREFIX="%s" -DCMAKE_BUILD_TYPE=Release' % (thisdir,version,CMAKE,kimdir)
txt = subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True)
if verboseflag: print(txt.decode("UTF-8"))

View File

@ -1,5 +1,165 @@
# Change Log
## [3.5.00](https://github.com/kokkos/kokkos/tree/3.5.00) (2021-10-19)
[Full Changelog](https://github.com/kokkos/kokkos/compare/3.4.01...3.5.00)
### Features:
- Add support for quad-precision math functions/traits [\#4098](https://github.com/kokkos/kokkos/pull/4098)
- Adding ExecutionSpace partitioning function [\#4096](https://github.com/kokkos/kokkos/pull/4096)
- Improve Python Interop Capabilities [\#4065](https://github.com/kokkos/kokkos/pull/4065)
- Add half_t Kokkos::rand specialization [\#3922](https://github.com/kokkos/kokkos/pull/3922)
- Add math special functions: erf, erfcx, expint1, Bessel functions, Hankel functions [\#3920](https://github.com/kokkos/kokkos/pull/3920)
- Add missing common mathematical functions [\#4043](https://github.com/kokkos/kokkos/pull/4043) [\#4036](https://github.com/kokkos/kokkos/pull/4036) [\#4034](https://github.com/kokkos/kokkos/pull/4034)
- Let the numeric traits be SFINAE-friendly [\#4038](https://github.com/kokkos/kokkos/pull/4038)
- Add Desul atomics - enabling memory-order and memory-scope parameters [\#3247](https://github.com/kokkos/kokkos/pull/3247)
- Add detection idiom from the C++ standard library extension version 2 [\#3980](https://github.com/kokkos/kokkos/pull/3980)
- Fence Profiling Support in all backends [\#3966](https://github.com/kokkos/kokkos/pull/3966) [\#4304](https://github.com/kokkos/kokkos/pull/4304) [\#4258](https://github.com/kokkos/kokkos/pull/4258) [\#4232](https://github.com/kokkos/kokkos/pull/4232)
- Significant SYCL enhancements (see below)
### Deprecations:
- Deprecate CUDA_SAFE_CALL and HIP_SAFE_CALL [\#4249](https://github.com/kokkos/kokkos/pull/4249)
- Deprecate Kokkos::Impl::Timer (Kokkos::Timer has been available for a long time) [\#4201](https://github.com/kokkos/kokkos/pull/4201)
- Deprecate Experimental::MasterLock [\#4094](https://github.com/kokkos/kokkos/pull/4094)
- Deprecate Kokkos_TaskPolicy.hpp (headers got reorganized, doesn't remove functionality) [\#4011](https://github.com/kokkos/kokkos/pull/4011)
- Deprecate backward compatibility features [\#3978](https://github.com/kokkos/kokkos/pull/3978)
- Update and deprecate is_space::host_memory/execution/mirror_space [\#3973](https://github.com/kokkos/kokkos/pull/3973)
### Backends and Archs Enhancements:
- Enabling constbitset constructors in kernels [\#4296](https://github.com/kokkos/kokkos/pull/4296)
- Use ZeroMemset in View constructor to improve performance [\#4226](https://github.com/kokkos/kokkos/pull/4226)
- Use memset in deep_copy [\#3944](https://github.com/kokkos/kokkos/pull/3944)
- Add missing fence() calls in resize(View) that effectively do deep_copy(resized, orig) [\#4212](https://github.com/kokkos/kokkos/pull/4212)
- Avoid allocations in resize and realloc [\#4207](https://github.com/kokkos/kokkos/pull/4207)
- StaticCsrGraph: use device type instead of execution space to construct views [\#3991](https://github.com/kokkos/kokkos/pull/3991)
- Consider std::sort when view is accessible from host [\#3929](https://github.com/kokkos/kokkos/pull/3929)
- Fix CPP20 warnings except for volatile [\#4312](https://github.com/kokkos/kokkos/pull/4312)
#### SYCL:
- Introduce SYCLHostUSMSpace [\#4268](https://github.com/kokkos/kokkos/pull/4268)
- Implement SYCL TeamPolicy for vector_size > 1 [\#4183](https://github.com/kokkos/kokkos/pull/4183)
- Enable 64bit ranges for SYCL [\#4211](https://github.com/kokkos/kokkos/pull/4211)
- Don't print SYCL device info in execution space intialization [\#4168](https://github.com/kokkos/kokkos/pull/4168)
- Improve SYCL MDRangePolicy performance [\#4161](https://github.com/kokkos/kokkos/pull/4161)
- Use sub_groups in SYCL parallel_scan [\#4147](https://github.com/kokkos/kokkos/pull/4147)
- Implement subgroup reduction for SYCL RangePolicy parallel_reduce [\#3940](https://github.com/kokkos/kokkos/pull/3940)
- Use DPC++ broadcast extension in SYCL team_broadcast [\#4103](https://github.com/kokkos/kokkos/pull/4103)
- Only fence in SYCL parallel_reduce for non-device-accessible result_ptr [\#4089](https://github.com/kokkos/kokkos/pull/4089)
- Improve fencing behavior in SYCL backend [\#4088](https://github.com/kokkos/kokkos/pull/4088)
- Fence all registered SYCL queues before deallocating memory [\#4086](https://github.com/kokkos/kokkos/pull/4086)
- Implement SYCL::print_configuration [\#3992](https://github.com/kokkos/kokkos/pull/3992)
- Reuse scratch memory in parallel_scan and TeamPolicy (decreases memory footprint) [\#3899](https://github.com/kokkos/kokkos/pull/3899) [\#3889](https://github.com/kokkos/kokkos/pull/3889)
#### CUDA:
- Cuda improve heuristic for blocksize [\#4271](https://github.com/kokkos/kokkos/pull/4271)
- Don't use [[deprecated]] for nvcc [\#4229](https://github.com/kokkos/kokkos/pull/4229)
- Improve error message for NVHPC as host compiler [\#4227](https://github.com/kokkos/kokkos/pull/4227)
- Update support for cuda reductions to work with types < 4bytes [\#4156](https://github.com/kokkos/kokkos/pull/4156)
- Fix incompatible team size deduction in rare cases parallel_reduce [\#4142](https://github.com/kokkos/kokkos/pull/4142)
- Remove UVM usage in DynamicView [\#4129](https://github.com/kokkos/kokkos/pull/4129)
- Remove dependency between core and containers [\#4114](https://github.com/kokkos/kokkos/pull/4114)
- Adding opt-in CudaMallocSync support when using CUDA version >= 11.2 [\#4026](https://github.com/kokkos/kokkos/pull/4026) [\#4233](https://github.com/kokkos/kokkos/pull/4233)
- Fix a potential race condition in the CUDA backend [\#3999](https://github.com/kokkos/kokkos/pull/3999)
#### HIP:
- Implement new blocksize deduction method for HIP Backend [\#3953](https://github.com/kokkos/kokkos/pull/3953)
- Add multiple LaunchMechanism [\#3820](https://github.com/kokkos/kokkos/pull/3820)
- Make HIP backend thread-safe [\#4170](https://github.com/kokkos/kokkos/pull/4170)
#### Serial:
- Refactor Serial backend and fix thread-safety issue [\#4053](https://github.com/kokkos/kokkos/pull/4053)
#### OpenMPTarget:
- OpenMPTarget: support array reductions in RangePolicy [\#4040](https://github.com/kokkos/kokkos/pull/4040)
- OpenMPTarget: add MDRange parallel_reduce [\#4032](https://github.com/kokkos/kokkos/pull/4032)
- OpenMPTarget: Fix bug in for the case of a reducer. [\#4044](https://github.com/kokkos/kokkos/pull/4044)
- OpenMPTarget: verify process fix [\#4041](https://github.com/kokkos/kokkos/pull/4041)
### Implemented enhancements BuildSystem
#### Important BuildSystem Updates:
- Use hipcc architecture autodetection when Kokkos_ARCH is not set [\#3941](https://github.com/kokkos/kokkos/pull/3941)
- Introduce Kokkos_ENABLE_DEPRECATION_WARNINGS and remove deprecated code with Kokkos_ENABLE_DEPRECATED_CODE_3 [\#4106](https://github.com/kokkos/kokkos/pull/4106) [\#3855](https://github.com/kokkos/kokkos/pull/3855)
#### Other Improvements:
- Add allow-unsupported-compiler flag to nvcc-wrapper [\#4298](https://github.com/kokkos/kokkos/pull/4298)
- nvcc_wrapper: fix errors in argument handling [\#3993](https://github.com/kokkos/kokkos/pull/3993)
- Adds support for -time=<file> and -time <file> in nvcc_wrapper [\#4015](https://github.com/kokkos/kokkos/pull/4015)
- nvcc_wrapper: suppress duplicates of GPU architecture and RDC flags [\#3968](https://github.com/kokkos/kokkos/pull/3968)
- Fix TMPDIR support in nvcc_wrapper [\#3792](https://github.com/kokkos/kokkos/pull/3792)
- NVHPC: update PGI compiler arch flags [\#4133](https://github.com/kokkos/kokkos/pull/4133)
- Replace PGI with NVHPC (works for both) [\#4196](https://github.com/kokkos/kokkos/pull/4196)
- Make sure that KOKKOS_CXX_HOST_COMPILER_ID is defined [\#4235](https://github.com/kokkos/kokkos/pull/4235)
- Add options to Makefile builds for deprecated code and warnings [\#4215](https://github.com/kokkos/kokkos/pull/4215)
- Use KOKKOS_CXX_HOST_COMPILER_ID for identifying CPU arch flags [\#4199](https://github.com/kokkos/kokkos/pull/4199)
- Added support for Cray Clang to Makefile.kokkos [\#4176](https://github.com/kokkos/kokkos/pull/4176)
- Add XLClang as compiler [\#4120](https://github.com/kokkos/kokkos/pull/4120)
- Keep quoted compiler flags when passing to Trilinos [\#3987](https://github.com/kokkos/kokkos/pull/3987)
- Add support for AMD Zen3 CPU architecture [\#3972](https://github.com/kokkos/kokkos/pull/3972)
- Rename IntelClang to IntelLLVM [\#3945](https://github.com/kokkos/kokkos/pull/3945)
- Add cppcoreguidelines-pro-type-cstyle-cast to clang-tidy [\#3522](https://github.com/kokkos/kokkos/pull/3522)
- Add sve bit size definition for A64FX [\#3947](https://github.com/kokkos/kokkos/pull/3947) [\#3946](https://github.com/kokkos/kokkos/pull/3946)
- Remove KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES [\#4150](https://github.com/kokkos/kokkos/pull/4150)
### Other Changes:
#### Tool Enhancements:
- Retrieve original value from a point in a MultidimensionalSparseTuningProblem [\#3977](https://github.com/kokkos/kokkos/pull/3977)
- Allow extension of built-in tuners with additional tuning axes [\#3961](https://github.com/kokkos/kokkos/pull/3961)
- Added a categorical tuner [\#3955](https://github.com/kokkos/kokkos/pull/3955)
#### Miscellaneous:
- hpcbind: Use double quotes around $@ when invoking user command [\#4284](https://github.com/kokkos/kokkos/pull/4284)
- Add file and line to error message [\#3985](https://github.com/kokkos/kokkos/pull/3985)
- Fix compiler warnings when compiling with nvc++ [\#4198](https://github.com/kokkos/kokkos/pull/4198)
- Add OpenMPTarget CI build on AMD GPUs [\#4055](https://github.com/kokkos/kokkos/pull/4055)
- CI: icpx is now part of intel container [\#4002](https://github.com/kokkos/kokkos/pull/4002)
### Incompatibilities:
- Remove pre CUDA 9 KOKKOS_IMPL_CUDA_* macros [\#4138](https://github.com/kokkos/kokkos/pull/4138)
### Bug Fixes:
- UnorderedMap::clear() should zero the size() [\#4130](https://github.com/kokkos/kokkos/pull/4130)
- Add memory fence for HostSharedPtr::cleanup() [\#4144](https://github.com/kokkos/kokkos/pull/4144)
- SYCL: Fix race conditions in TeamPolicy::parallel_reduce [\#4418](https://github.com/kokkos/kokkos/pull/4418)
- Adding missing memory fence to serial exec space fence. [\#4292](https://github.com/kokkos/kokkos/pull/4292)
- Fix using external SYCL queues in tests [\#4291](https://github.com/kokkos/kokkos/pull/4291)
- Fix digits10 bug [\#4281](https://github.com/kokkos/kokkos/pull/4281)
- Fixes constexpr errors with frounding-math on gcc < 10. [\#4278](https://github.com/kokkos/kokkos/pull/4278)
- Fix compiler flags for PGI/NVHPC [\#4264](https://github.com/kokkos/kokkos/pull/4264)
- Fix Zen2/3 also implying Zen Arch with Makefiles [\#4260](https://github.com/kokkos/kokkos/pull/4260)
- Kokkos_Cuda.hpp: Fix shadow warning with cuda/11.0 [\#4252](https://github.com/kokkos/kokkos/pull/4252)
- Fix issue w/ static initialization of function attributes [\#4242](https://github.com/kokkos/kokkos/pull/4242)
- Disable long double hypot test on Power systems [\#4221](https://github.com/kokkos/kokkos/pull/4221)
- Fix false sharing in random pool [\#4218](https://github.com/kokkos/kokkos/pull/4218)
- Fix a missing memory_fence for debug shared alloc code [\#4216](https://github.com/kokkos/kokkos/pull/4216)
- Fix two xl issues [\#4179](https://github.com/kokkos/kokkos/pull/4179)
- Makefile.kokkos: fix (standard_in) 1: syntax error [\#4173](https://github.com/kokkos/kokkos/pull/4173)
- Fixes for query_device example [\#4172](https://github.com/kokkos/kokkos/pull/4172)
- Fix a bug when using HIP atomic with Kokkos::Complex [\#4159](https://github.com/kokkos/kokkos/pull/4159)
- Fix mistaken logic in pthread creation [\#4157](https://github.com/kokkos/kokkos/pull/4157)
- Define KOKKOS_ENABLE_AGGRESSIVE_VECTORIZATION when requesting Kokkos_ENABLE_AGGRESSIVE_VECTORIZATION=ON [\#4107](https://github.com/kokkos/kokkos/pull/4107)
- Fix compilation with latest MSVC version [\#4102](https://github.com/kokkos/kokkos/pull/4102)
- Fix incorrect macro definitions when compiling with Intel compiler on Windows [\#4087](https://github.com/kokkos/kokkos/pull/4087)
- Fixup global buffer overflow in hand rolled string manipulation [\#4070](https://github.com/kokkos/kokkos/pull/4070)
- Fixup heap buffer overflow in cmd line args parsing unit tests [\#4069](https://github.com/kokkos/kokkos/pull/4069)
- Only add quotes in compiler flags for Trilinos if necessary [\#4067](https://github.com/kokkos/kokkos/pull/4067)
- Fixed invocation of tools init callbacks [\#4061](https://github.com/kokkos/kokkos/pull/4061)
- Work around SYCL JIT compiler issues with static variables [\#4013](https://github.com/kokkos/kokkos/pull/4013)
- Fix TestDetectionIdiom.cpp test inclusion for Trilinos/TriBITS [\#4010](https://github.com/kokkos/kokkos/pull/4010)
- Fixup allocation headers with OpenMPTarget backend [\#4003](https://github.com/kokkos/kokkos/pull/4003)
- Add missing specialization for OMPT to Kokkos Random [\#3967](https://github.com/kokkos/kokkos/pull/3967)
- Disable hypot long double test on power arches [\#3962](https://github.com/kokkos/kokkos/pull/3962)
- Use different EBO workaround for MSVC (rebased) [\#3924](https://github.com/kokkos/kokkos/pull/3924)
- Fix SYCL Kokkos::Profiling::(de)allocateData calls [\#3928](https://github.com/kokkos/kokkos/pull/3928)
## [3.4.01](https://github.com/kokkos/kokkos/tree/3.4.01) (2021-05-19)
[Full Changelog](https://github.com/kokkos/kokkos/compare/3.4.00...3.4.01)

View File

@ -111,8 +111,8 @@ ENDIF()
set(Kokkos_VERSION_MAJOR 3)
set(Kokkos_VERSION_MINOR 4)
set(Kokkos_VERSION_PATCH 01)
set(Kokkos_VERSION_MINOR 5)
set(Kokkos_VERSION_PATCH 00)
set(Kokkos_VERSION "${Kokkos_VERSION_MAJOR}.${Kokkos_VERSION_MINOR}.${Kokkos_VERSION_PATCH}")
math(EXPR KOKKOS_VERSION "${Kokkos_VERSION_MAJOR} * 10000 + ${Kokkos_VERSION_MINOR} * 100 + ${Kokkos_VERSION_PATCH}")
@ -210,7 +210,12 @@ IF (KOKKOS_HAS_TRILINOS)
# which needs another workaround.
SET(KOKKOS_COMPILE_OPTIONS_TMP)
FOREACH(OPTION ${KOKKOS_COMPILE_OPTIONS})
LIST(APPEND KOKKOS_COMPILE_OPTIONS_TMP \"${OPTION}\")
STRING(FIND "${OPTION}" " " OPTION_HAS_WHITESPACE)
IF(OPTION_HAS_WHITESPACE EQUAL -1)
LIST(APPEND KOKKOS_COMPILE_OPTIONS_TMP "${OPTION}")
ELSE()
LIST(APPEND KOKKOS_COMPILE_OPTIONS_TMP "\"${OPTION}\"")
ENDIF()
ENDFOREACH()
STRING(REPLACE ";" " " KOKKOSCORE_COMPILE_OPTIONS "${KOKKOS_COMPILE_OPTIONS_TMP}")
LIST(APPEND KOKKOS_ALL_COMPILE_OPTIONS ${KOKKOS_COMPILE_OPTIONS})

View File

@ -11,20 +11,21 @@ CXXFLAGS += $(SHFLAGS)
endif
KOKKOS_VERSION_MAJOR = 3
KOKKOS_VERSION_MINOR = 4
KOKKOS_VERSION_PATCH = 01
KOKKOS_VERSION_MINOR = 5
KOKKOS_VERSION_PATCH = 00
KOKKOS_VERSION = $(shell echo $(KOKKOS_VERSION_MAJOR)*10000+$(KOKKOS_VERSION_MINOR)*100+$(KOKKOS_VERSION_PATCH) | bc)
# Options: Cuda,HIP,OpenMP,Pthread,Serial
# Options: Cuda,HIP,SYCL,OpenMPTarget,OpenMP,Pthread,Serial
KOKKOS_DEVICES ?= "OpenMP"
#KOKKOS_DEVICES ?= "Pthread"
# Options:
# Options:
# Intel: KNC,KNL,SNB,HSW,BDW,SKX
# NVIDIA: Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal60,Pascal61,Volta70,Volta72,Turing75,Ampere80,Ampere86
# ARM: ARMv80,ARMv81,ARMv8-ThunderX,ARMv8-TX2,A64FX
# IBM: BGQ,Power7,Power8,Power9
# AMD-GPUS: Vega900,Vega906,Vega908
# AMD-GPUS: Vega900,Vega906,Vega908,Vega90A
# AMD-CPUS: AMDAVX,Zen,Zen2,Zen3
# Intel-GPUs: Gen9,Gen11,Gen12LP,DG1,XeHP
KOKKOS_ARCH ?= ""
# Options: yes,no
KOKKOS_DEBUG ?= "no"
@ -32,8 +33,8 @@ KOKKOS_DEBUG ?= "no"
KOKKOS_USE_TPLS ?= ""
# Options: c++14,c++1y,c++17,c++1z,c++2a
KOKKOS_CXX_STANDARD ?= "c++14"
# Options: aggressive_vectorization,disable_profiling,enable_large_mem_tests,disable_complex_align
KOKKOS_OPTIONS ?= ""
# Options: aggressive_vectorization,disable_profiling,enable_large_mem_tests,disable_complex_align,disable_deprecated_code,enable_deprecation_warnings,enable_desul_atomics
KOKKOS_OPTIONS ?= "enable_desul_atomics"
KOKKOS_CMAKE ?= "no"
KOKKOS_TRIBITS ?= "no"
KOKKOS_STANDALONE_CMAKE ?= "no"
@ -80,7 +81,7 @@ KOKKOS_INTERNAL_USE_MEMKIND := $(call kokkos_has_string,$(KOKKOS_USE_TPLS),exper
# Check for advanced settings.
KOKKOS_INTERNAL_ENABLE_COMPILER_WARNINGS := $(call kokkos_has_string,$(KOKKOS_OPTIONS),compiler_warnings)
KOKKOS_INTERNAL_OPT_RANGE_AGGRESSIVE_VECTORIZATION := $(call kokkos_has_string,$(KOKKOS_OPTIONS),aggressive_vectorization)
KOKKOS_INTERNAL_AGGRESSIVE_VECTORIZATION := $(call kokkos_has_string,$(KOKKOS_OPTIONS),aggressive_vectorization)
KOKKOS_INTERNAL_ENABLE_TUNING := $(call kokkos_has_string,$(KOKKOS_OPTIONS),enable_tuning)
KOKKOS_INTERNAL_DISABLE_COMPLEX_ALIGN := $(call kokkos_has_string,$(KOKKOS_OPTIONS),disable_complex_align)
KOKKOS_INTERNAL_DISABLE_DUALVIEW_MODIFY_CHECK := $(call kokkos_has_string,$(KOKKOS_OPTIONS),disable_dualview_modify_check)
@ -92,6 +93,9 @@ KOKKOS_INTERNAL_CUDA_USE_RELOC := $(call kokkos_has_string,$(KOKKOS_CUDA_OPTIONS
KOKKOS_INTERNAL_CUDA_USE_LAMBDA := $(call kokkos_has_string,$(KOKKOS_CUDA_OPTIONS),enable_lambda)
KOKKOS_INTERNAL_CUDA_USE_CONSTEXPR := $(call kokkos_has_string,$(KOKKOS_CUDA_OPTIONS),enable_constexpr)
KOKKOS_INTERNAL_HPX_ENABLE_ASYNC_DISPATCH := $(call kokkos_has_string,$(KOKKOS_HPX_OPTIONS),enable_async_dispatch)
KOKKOS_INTERNAL_ENABLE_DESUL_ATOMICS := $(call kokkos_has_string,$(KOKKOS_OPTIONS),enable_desul_atomics)
KOKKOS_INTERNAL_DISABLE_DEPRECATED_CODE := $(call kokkos_has_string,$(KOKKOS_OPTIONS),disable_deprecated_code)
KOKKOS_INTERNAL_ENABLE_DEPRECATION_WARNINGS := $(call kokkos_has_string,$(KOKKOS_OPTIONS),enable_deprecation_warnings)
KOKKOS_INTERNAL_HIP_USE_RELOC := $(call kokkos_has_string,$(KOKKOS_HIP_OPTIONS),rdc)
@ -112,6 +116,7 @@ endif
# Check for other Execution Spaces.
KOKKOS_INTERNAL_USE_CUDA := $(call kokkos_has_string,$(KOKKOS_DEVICES),Cuda)
KOKKOS_INTERNAL_USE_HIP := $(call kokkos_has_string,$(KOKKOS_DEVICES),HIP)
KOKKOS_INTERNAL_USE_SYCL := $(call kokkos_has_string,$(KOKKOS_DEVICES),SYCL)
KOKKOS_INTERNAL_USE_OPENMPTARGET := $(call kokkos_has_string,$(KOKKOS_DEVICES),OpenMPTarget)
KOKKOS_DEVICELIST =
@ -133,11 +138,18 @@ endif
ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1)
KOKKOS_DEVICELIST += HIP
endif
KOKKOS_INTERNAL_HAVE_CXX17_OR_NEWER := $(shell expr $(KOKKOS_INTERNAL_ENABLE_CXX17) \
+ $(KOKKOS_INTERNAL_ENABLE_CXX20) \
+ $(KOKKOS_INTERNAL_ENABLE_CXX2A))
ifeq ($(KOKKOS_INTERNAL_USE_SYCL), 1)
KOKKOS_DEVICELIST += SYCL
ifneq ($(KOKKOS_INTERNAL_HAVE_CXX17_OR_NEWER), 1)
$(error SYCL backend requires C++17 or newer)
endif
endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1)
KOKKOS_DEVICELIST += OPENMPTARGET
KOKKOS_INTERNAL_HAVE_CXX17_OR_NEWER := $(shell expr $(KOKKOS_INTERNAL_ENABLE_CXX17) \
+ $(KOKKOS_INTERNAL_ENABLE_CXX20) \
+ $(KOKKOS_INTERNAL_ENABLE_CXX2A))
ifneq ($(KOKKOS_INTERNAL_HAVE_CXX17_OR_NEWER), 1)
$(error OpenMPTarget backend requires C++17 or newer)
endif
@ -168,6 +180,8 @@ KOKKOS_INTERNAL_COMPILER_XL := $(strip $(shell $(CXX) -qversion 2
KOKKOS_INTERNAL_COMPILER_CRAY := $(strip $(shell $(CXX) -craype-verbose 2>&1 | grep -c "CC-"))
KOKKOS_INTERNAL_COMPILER_NVCC := $(strip $(shell echo "$(shell export OMPI_CXX=$(OMPI_CXX); export MPICH_CXX=$(MPICH_CXX); $(CXX) --version 2>&1 | grep -c nvcc)>0" | bc))
KOKKOS_INTERNAL_COMPILER_CLANG := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),clang)
KOKKOS_INTERNAL_COMPILER_CRAY_CLANG := $(strip $(shell $(CXX) -craype-verbose 2>&1 | grep -c "clang++"))
KOKKOS_INTERNAL_COMPILER_INTEL_CLANG := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),oneAPI)
KOKKOS_INTERNAL_COMPILER_APPLE_CLANG := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),Apple clang)
KOKKOS_INTERNAL_COMPILER_HCC := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),HCC)
KOKKOS_INTERNAL_COMPILER_GCC := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),GCC)
@ -247,7 +261,11 @@ ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
KOKKOS_INTERNAL_OPENMP_FLAG := -mp
else
ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY_CLANG), 1)
KOKKOS_INTERNAL_OPENMP_FLAG := -fopenmp
else
KOKKOS_INTERNAL_OPENMP_FLAG := -fopenmp=libomp
endif
else
ifeq ($(KOKKOS_INTERNAL_COMPILER_APPLE_CLANG), 1)
KOKKOS_INTERNAL_OPENMP_FLAG := -fopenmp=libomp
@ -259,7 +277,11 @@ else
# OpenMP is turned on by default in Cray compiler environment.
KOKKOS_INTERNAL_OPENMP_FLAG :=
else
KOKKOS_INTERNAL_OPENMP_FLAG := -fopenmp
ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL_CLANG), 1)
KOKKOS_INTERNAL_OPENMP_FLAG := -fiopenmp
else
KOKKOS_INTERNAL_OPENMP_FLAG := -fopenmp
endif
endif
endif
endif
@ -317,6 +339,13 @@ KOKKOS_INTERNAL_USE_ARCH_BDW := $(call kokkos_has_string,$(KOKKOS_ARCH),BDW)
KOKKOS_INTERNAL_USE_ARCH_SKX := $(call kokkos_has_string,$(KOKKOS_ARCH),SKX)
KOKKOS_INTERNAL_USE_ARCH_KNL := $(call kokkos_has_string,$(KOKKOS_ARCH),KNL)
KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelGen)
KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN9 := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelGen9)
KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN11 := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelGen11)
KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN12LP := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelGen12LP)
KOKKOS_INTERNAL_USE_ARCH_INTEL_DG1 := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelDG1)
KOKKOS_INTERNAL_USE_ARCH_INTEL_XEHP := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelXeHP)
# NVIDIA based.
NVCC_WRAPPER := $(KOKKOS_PATH)/bin/nvcc_wrapper
KOKKOS_INTERNAL_USE_ARCH_KEPLER30 := $(call kokkos_has_string,$(KOKKOS_ARCH),Kepler30)
@ -384,20 +413,25 @@ KOKKOS_INTERNAL_USE_ARCH_IBM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_
KOKKOS_INTERNAL_USE_ARCH_AMDAVX := $(call kokkos_has_string,$(KOKKOS_ARCH),AMDAVX)
KOKKOS_INTERNAL_USE_ARCH_ZEN3 := $(call kokkos_has_string,$(KOKKOS_ARCH),Zen3)
KOKKOS_INTERNAL_USE_ARCH_ZEN2 := $(call kokkos_has_string,$(KOKKOS_ARCH),Zen2)
KOKKOS_INTERNAL_USE_ARCH_ZEN := $(call kokkos_has_string,$(KOKKOS_ARCH),Zen)
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ZEN3), 0)
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ZEN2), 0)
KOKKOS_INTERNAL_USE_ARCH_ZEN := $(call kokkos_has_string,$(KOKKOS_ARCH),Zen)
endif
endif
KOKKOS_INTERNAL_USE_ARCH_VEGA900 := $(call kokkos_has_string,$(KOKKOS_ARCH),Vega900)
KOKKOS_INTERNAL_USE_ARCH_VEGA906 := $(call kokkos_has_string,$(KOKKOS_ARCH),Vega906)
KOKKOS_INTERNAL_USE_ARCH_VEGA908 := $(call kokkos_has_string,$(KOKKOS_ARCH),Vega908)
KOKKOS_INTERNAL_USE_ARCH_VEGA90A := $(call kokkos_has_string,$(KOKKOS_ARCH),Vega90A)
# Any AVX?
KOKKOS_INTERNAL_USE_ARCH_SSE42 := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_WSM))
KOKKOS_INTERNAL_USE_ARCH_AVX := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_SNB) + $(KOKKOS_INTERNAL_USE_ARCH_AMDAVX))
KOKKOS_INTERNAL_USE_ARCH_AVX2 := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_HSW) + $(KOKKOS_INTERNAL_USE_ARCH_BDW) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN2)) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN3))
KOKKOS_INTERNAL_USE_ARCH_AVX2 := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_HSW) + $(KOKKOS_INTERNAL_USE_ARCH_BDW) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN2) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN3))
KOKKOS_INTERNAL_USE_ARCH_AVX512MIC := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_KNL))
KOKKOS_INTERNAL_USE_ARCH_AVX512XEON := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_SKX))
# Decide what ISA level we are able to support.
KOKKOS_INTERNAL_USE_ISA_X86_64 := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_WSM) + $(KOKKOS_INTERNAL_USE_ARCH_SNB) + $(KOKKOS_INTERNAL_USE_ARCH_HSW) + $(KOKKOS_INTERNAL_USE_ARCH_BDW) + $(KOKKOS_INTERNAL_USE_ARCH_KNL) + $(KOKKOS_INTERNAL_USE_ARCH_SKX) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN2)) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN3))
KOKKOS_INTERNAL_USE_ISA_X86_64 := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_WSM) + $(KOKKOS_INTERNAL_USE_ARCH_SNB) + $(KOKKOS_INTERNAL_USE_ARCH_HSW) + $(KOKKOS_INTERNAL_USE_ARCH_BDW) + $(KOKKOS_INTERNAL_USE_ARCH_KNL) + $(KOKKOS_INTERNAL_USE_ARCH_SKX) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN2) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN3))
KOKKOS_INTERNAL_USE_ISA_KNC := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_KNC))
KOKKOS_INTERNAL_USE_ISA_POWERPCLE := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_POWER8) + $(KOKKOS_INTERNAL_USE_ARCH_POWER9))
KOKKOS_INTERNAL_USE_ISA_POWERPCBE := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_POWER7))
@ -406,7 +440,7 @@ KOKKOS_INTERNAL_USE_ISA_POWERPCBE := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_POW
KOKKOS_INTERNAL_USE_TM := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_BDW) + $(KOKKOS_INTERNAL_USE_ARCH_SKX))
# Incompatible flags?
KOKKOS_INTERNAL_USE_ARCH_MULTIHOST := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_SSE42)+$(KOKKOS_INTERNAL_USE_ARCH_AVX)+$(KOKKOS_INTERNAL_USE_ARCH_AVX2)+$(KOKKOS_INTERNAL_USE_ARCH_AVX512MIC)+$(KOKKOS_INTERNAL_USE_ARCH_AVX512XEON)+$(KOKKOS_INTERNAL_USE_ARCH_KNC)+$(KOKKOS_INTERNAL_USE_ARCH_IBM)+$(KOKKOS_INTERNAL_USE_ARCH_ARM)>1") | bc )
KOKKOS_INTERNAL_USE_ARCH_MULTIHOST := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_SSE42)+$(KOKKOS_INTERNAL_USE_ARCH_AVX)+$(KOKKOS_INTERNAL_USE_ARCH_AVX2)+$(KOKKOS_INTERNAL_USE_ARCH_AVX512MIC)+$(KOKKOS_INTERNAL_USE_ARCH_AVX512XEON)+$(KOKKOS_INTERNAL_USE_ARCH_KNC)+$(KOKKOS_INTERNAL_USE_ARCH_IBM)+$(KOKKOS_INTERNAL_USE_ARCH_ARM)>1") | bc)
KOKKOS_INTERNAL_USE_ARCH_MULTIGPU := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_NVIDIA)>1") | bc)
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MULTIHOST), 1)
@ -442,6 +476,10 @@ KOKKOS_LINK_FLAGS =
KOKKOS_SRC =
KOKKOS_HEADERS =
#ifeq ($(KOKKOS_INTERNAL_COMPILER_GCC), 1)
KOKKOS_LIBS += -latomic
#endif
# Generating the KokkosCore_config.h file.
KOKKOS_INTERNAL_CONFIG_TMP=KokkosCore_config.tmp
@ -478,6 +516,10 @@ ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1)
tmp := $(call kokkos_append_header,'$H''define KOKKOS_ENABLE_HIP')
endif
ifeq ($(KOKKOS_INTERNAL_USE_SYCL), 1)
tmp := $(call kokkos_append_header,'$H''define KOKKOS_ENABLE_SYCL')
endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1)
tmp := $(call kokkos_append_header,'$H''define KOKKOS_ENABLE_OPENMPTARGET')
ifeq ($(KOKKOS_INTERNAL_COMPILER_GCC), 1)
@ -533,6 +575,12 @@ endif
#only add the c++ standard flags if this is not CMake
tmp := $(call kokkos_append_header,"/* General Settings */")
ifneq ($(KOKKOS_INTERNAL_DISABLE_DEPRECATED_CODE), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_DEPRECATED_CODE_3")
endif
ifeq ($(KOKKOS_INTERNAL_ENABLE_DEPRECATION_WARNINGS), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_DEPRECATION_WARNINGS")
endif
ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX14), 1)
ifneq ($(KOKKOS_STANDALONE_CMAKE), yes)
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX14_FLAG)
@ -635,8 +683,10 @@ endif
tmp := $(call kokkos_append_header,"/* Optimization Settings */")
ifeq ($(KOKKOS_INTERNAL_OPT_RANGE_AGGRESSIVE_VECTORIZATION), 1)
ifeq ($(KOKKOS_INTERNAL_AGGRESSIVE_VECTORIZATION), 1)
# deprecated
tmp := $(call kokkos_append_header,"$H""define KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_AGGRESSIVE_VECTORIZATION")
endif
tmp := $(call kokkos_append_header,"/* Cuda Settings */")
@ -1166,6 +1216,11 @@ ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA908")
KOKKOS_INTERNAL_HIP_ARCH_FLAG := --amdgpu-target=gfx908
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VEGA90A), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_HIP 90A")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA90A")
KOKKOS_INTERNAL_HIP_ARCH_FLAG := --amdgpu-target=gfx90a
endif
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/HIP/*.cpp)
@ -1184,6 +1239,52 @@ ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1)
endif
endif
# Figure out the architecture flag for SYCL.
ifeq ($(KOKKOS_INTERNAL_USE_SYCL), 1)
# Lets start with adding architecture defines
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GPU")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GEN")
KOKKOS_INTERNAL_SYCL_ARCH_FLAG := -fsycl-targets=spir64_gen-unknown-unknown-sycldevice -Xsycl-target-backend "-device gen9-"
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN9), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GPU")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GEN9")
KOKKOS_INTERNAL_SYCL_ARCH_FLAG := -fsycl-targets=spir64_gen-unknown-unknown-sycldevice -Xsycl-target-backend "-device gen9"
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN11), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GPU")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GEN11")
KOKKOS_INTERNAL_SYCL_ARCH_FLAG := -fsycl-targets=spir64_gen-unknown-unknown-sycldevice -Xsycl-target-backend "-device gen11"
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN12LP), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GPU")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GEN12LP")
KOKKOS_INTERNAL_SYCL_ARCH_FLAG := -fsycl-targets=spir64_gen-unknown-unknown-sycldevice -Xsycl-target-backend "-device gen12lp"
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_DG1), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GPU")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_DG1")
KOKKOS_INTERNAL_SYCL_ARCH_FLAG := -fsycl-targets=spir64_gen-unknown-unknown-sycldevice -Xsycl-target-backend "-device dg1"
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_XEHP), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GPU")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_XEHP")
KOKKOS_INTERNAL_SYCL_ARCH_FLAG := -fsycl-targets=spir64_gen-unknown-unknown-sycldevice -Xsycl-target-backend "-device xehp"
endif
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/SYCL/*.cpp)
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/SYCL/*.hpp)
KOKKOS_CXXFLAGS+=-fsycl -fno-sycl-id-queries-fit-in-int -fsycl-unnamed-lambda
KOKKOS_CXXFLAGS+=$(KOKKOS_INTERNAL_SYCL_ARCH_FLAG)
KOKKOS_LDFLAGS+=-fsycl
KOKKOS_LDFLAGS+=$(KOKKOS_INTERNAL_SYCL_ARCH_FLAG)
endif
ifeq ($(KOKKOS_INTERNAL_ENABLE_DESUL_ATOMICS), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_IMPL_DESUL_ATOMICS")
endif
KOKKOS_INTERNAL_LS_CONFIG := $(shell ls KokkosCore_config.h 2>&1)
@ -1196,56 +1297,62 @@ endif
ifneq ($(KOKKOS_INTERNAL_NEW_CONFIG), 0)
tmp := $(shell cp KokkosCore_config.tmp KokkosCore_config.h)
# Functions for generating config header file
kokkos_start_config_header = $(shell sed 's~@INCLUDE_NEXT_FILE@~~g' $(KOKKOS_PATH)/cmake/KokkosCore_Config_HeaderSet.in > $1)
kokkos_update_config_header = $(shell sed 's~@HEADER_GUARD_TAG@~$1~g' $2 > $3)
kokkos_append_config_header = $(shell echo $1 >> $2))
tmp := $(call kokkos_start_config_header, "KokkosCore_Config_FwdBackend.tmp")
tmp := $(call kokkos_start_config_header, "KokkosCore_Config_SetupBackend.tmp")
tmp := $(call kokkos_start_config_header, "KokkosCore_Config_DeclareBackend.tmp")
tmp := $(call kokkos_start_config_header, "KokkosCore_Config_PostInclude.tmp")
tmp := $(call kokkos_update_config_header, KOKKOS_FWD_HPP_, "KokkosCore_Config_FwdBackend.tmp", "KokkosCore_Config_FwdBackend.hpp")
tmp := $(call kokkos_update_config_header, KOKKOS_SETUP_HPP_, "KokkosCore_Config_SetupBackend.tmp", "KokkosCore_Config_SetupBackend.hpp")
tmp := $(call kokkos_update_config_header, KOKKOS_DECLARE_HPP_, "KokkosCore_Config_DeclareBackend.tmp", "KokkosCore_Config_DeclareBackend.hpp")
tmp := $(call kokkos_update_config_header, KOKKOS_POST_INCLUDE_HPP_, "KokkosCore_Config_PostInclude.tmp", "KokkosCore_Config_PostInclude.hpp")
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
tmp := $(call kokkos_append_config_header,"$H""include <fwd/Kokkos_Fwd_CUDA.hpp>","KokkosCore_Config_FwdBackend.hpp")
tmp := $(call kokkos_append_config_header,"$H""include <decl/Kokkos_Declare_CUDA.hpp>","KokkosCore_Config_DeclareBackend.hpp")
tmp := $(call kokkos_append_config_header,"$H""include <setup/Kokkos_Setup_Cuda.hpp>","KokkosCore_Config_SetupBackend.hpp")
ifeq ($(KOKKOS_INTERNAL_CUDA_USE_UVM), 1)
else
endif
endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1)
tmp := $(call kokkos_append_config_header,"$H""include <fwd/Kokkos_Fwd_OPENMPTARGET.hpp>","KokkosCore_Config_FwdBackend.hpp")
tmp := $(call kokkos_append_config_header,"$H""include <decl/Kokkos_Declare_OPENMPTARGET.hpp>","KokkosCore_Config_DeclareBackend.hpp")
endif
ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1)
tmp := $(call kokkos_append_config_header,"$H""include <fwd/Kokkos_Fwd_HIP.hpp>","KokkosCore_Config_FwdBackend.hpp")
tmp := $(call kokkos_append_config_header,"$H""include <decl/Kokkos_Declare_HIP.hpp>","KokkosCore_Config_DeclareBackend.hpp")
tmp := $(call kokkos_append_config_header,"$H""include <setup/Kokkos_Setup_HIP.hpp>","KokkosCore_Config_SetupBackend.hpp")
endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
tmp := $(call kokkos_append_config_header,"$H""include <fwd/Kokkos_Fwd_OPENMP.hpp>","KokkosCore_Config_FwdBackend.hpp")
tmp := $(call kokkos_append_config_header,"$H""include <decl/Kokkos_Declare_OPENMP.hpp>","KokkosCore_Config_DeclareBackend.hpp")
endif
ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
tmp := $(call kokkos_append_config_header,"$H""include <fwd/Kokkos_Fwd_THREADS.hpp>","KokkosCore_Config_FwdBackend.hpp")
tmp := $(call kokkos_append_config_header,"$H""include <decl/Kokkos_Declare_THREADS.hpp>","KokkosCore_Config_DeclareBackend.hpp")
endif
ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1)
tmp := $(call kokkos_append_config_header,"$H""include <fwd/Kokkos_Fwd_HPX.hpp>","KokkosCore_Config_FwdBackend.hpp")
tmp := $(call kokkos_append_config_header,"$H""include <decl/Kokkos_Declare_HPX.hpp>","KokkosCore_Config_DeclareBackend.hpp")
endif
ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1)
tmp := $(call kokkos_append_config_header,"$H""include <fwd/Kokkos_Fwd_SERIAL.hpp>","KokkosCore_Config_FwdBackend.hpp")
tmp := $(call kokkos_append_config_header,"$H""include <decl/Kokkos_Declare_SERIAL.hpp>","KokkosCore_Config_DeclareBackend.hpp")
endif
ifeq ($(KOKKOS_INTERNAL_USE_MEMKIND), 1)
tmp := $(call kokkos_append_config_header,"$H""include <fwd/Kokkos_Fwd_HBWSpace.hpp>","KokkosCore_Config_FwdBackend.hpp")
tmp := $(call kokkos_append_config_header,"$H""include <decl/Kokkos_Declare_HBWSpace.hpp>","KokkosCore_Config_DeclareBackend.hpp")
endif
# Functions for generating config header file
kokkos_start_config_header = $(shell sed 's~@INCLUDE_NEXT_FILE@~~g' $(KOKKOS_PATH)/cmake/KokkosCore_Config_HeaderSet.in > $1)
kokkos_update_config_header = $(shell sed 's~@HEADER_GUARD_TAG@~$1~g' $2 > $3)
kokkos_append_config_header = $(shell echo $1 >> $2))
tmp := $(call kokkos_start_config_header, "KokkosCore_Config_FwdBackend.tmp")
tmp := $(call kokkos_start_config_header, "KokkosCore_Config_SetupBackend.tmp")
tmp := $(call kokkos_start_config_header, "KokkosCore_Config_DeclareBackend.tmp")
tmp := $(call kokkos_start_config_header, "KokkosCore_Config_PostInclude.tmp")
tmp := $(call kokkos_update_config_header, KOKKOS_FWD_HPP_, "KokkosCore_Config_FwdBackend.tmp", "KokkosCore_Config_FwdBackend.hpp")
tmp := $(call kokkos_update_config_header, KOKKOS_SETUP_HPP_, "KokkosCore_Config_SetupBackend.tmp", "KokkosCore_Config_SetupBackend.hpp")
tmp := $(call kokkos_update_config_header, KOKKOS_DECLARE_HPP_, "KokkosCore_Config_DeclareBackend.tmp", "KokkosCore_Config_DeclareBackend.hpp")
tmp := $(call kokkos_update_config_header, KOKKOS_POST_INCLUDE_HPP_, "KokkosCore_Config_PostInclude.tmp", "KokkosCore_Config_PostInclude.hpp")
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
tmp := $(call kokkos_append_config_header,"$H""include <fwd/Kokkos_Fwd_CUDA.hpp>","KokkosCore_Config_FwdBackend.hpp")
tmp := $(call kokkos_append_config_header,"$H""include <decl/Kokkos_Declare_CUDA.hpp>","KokkosCore_Config_DeclareBackend.hpp")
tmp := $(call kokkos_append_config_header,"$H""include <setup/Kokkos_Setup_Cuda.hpp>","KokkosCore_Config_SetupBackend.hpp")
ifeq ($(KOKKOS_INTERNAL_CUDA_USE_UVM), 1)
else
endif
endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1)
tmp := $(call kokkos_append_config_header,"$H""include <fwd/Kokkos_Fwd_OPENMPTARGET.hpp>","KokkosCore_Config_FwdBackend.hpp")
tmp := $(call kokkos_append_config_header,"$H""include <decl/Kokkos_Declare_OPENMPTARGET.hpp>","KokkosCore_Config_DeclareBackend.hpp")
endif
ifeq ($(KOKKOS_INTERNAL_USE_SYCL), 1)
tmp := $(call kokkos_append_config_header,"$H""include <fwd/Kokkos_Fwd_SYCL.hpp>","KokkosCore_Config_FwdBackend.hpp")
tmp := $(call kokkos_append_config_header,"$H""include <decl/Kokkos_Declare_SYCL.hpp>","KokkosCore_Config_DeclareBackend.hpp")
tmp := $(call kokkos_append_config_header,"$H""include <setup/Kokkos_Setup_SYCL.hpp>","KokkosCore_Config_SetupBackend.hpp")
endif
ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1)
tmp := $(call kokkos_append_config_header,"$H""include <fwd/Kokkos_Fwd_HIP.hpp>","KokkosCore_Config_FwdBackend.hpp")
tmp := $(call kokkos_append_config_header,"$H""include <decl/Kokkos_Declare_HIP.hpp>","KokkosCore_Config_DeclareBackend.hpp")
tmp := $(call kokkos_append_config_header,"$H""include <setup/Kokkos_Setup_HIP.hpp>","KokkosCore_Config_SetupBackend.hpp")
endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
tmp := $(call kokkos_append_config_header,"$H""include <fwd/Kokkos_Fwd_OPENMP.hpp>","KokkosCore_Config_FwdBackend.hpp")
tmp := $(call kokkos_append_config_header,"$H""include <decl/Kokkos_Declare_OPENMP.hpp>","KokkosCore_Config_DeclareBackend.hpp")
endif
ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
tmp := $(call kokkos_append_config_header,"$H""include <fwd/Kokkos_Fwd_THREADS.hpp>","KokkosCore_Config_FwdBackend.hpp")
tmp := $(call kokkos_append_config_header,"$H""include <decl/Kokkos_Declare_THREADS.hpp>","KokkosCore_Config_DeclareBackend.hpp")
endif
ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1)
tmp := $(call kokkos_append_config_header,"$H""include <fwd/Kokkos_Fwd_HPX.hpp>","KokkosCore_Config_FwdBackend.hpp")
tmp := $(call kokkos_append_config_header,"$H""include <decl/Kokkos_Declare_HPX.hpp>","KokkosCore_Config_DeclareBackend.hpp")
endif
ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1)
tmp := $(call kokkos_append_config_header,"$H""include <fwd/Kokkos_Fwd_SERIAL.hpp>","KokkosCore_Config_FwdBackend.hpp")
tmp := $(call kokkos_append_config_header,"$H""include <decl/Kokkos_Declare_SERIAL.hpp>","KokkosCore_Config_DeclareBackend.hpp")
endif
ifeq ($(KOKKOS_INTERNAL_USE_MEMKIND), 1)
tmp := $(call kokkos_append_config_header,"$H""include <fwd/Kokkos_Fwd_HBWSpace.hpp>","KokkosCore_Config_FwdBackend.hpp")
tmp := $(call kokkos_append_config_header,"$H""include <decl/Kokkos_Declare_HBWSpace.hpp>","KokkosCore_Config_DeclareBackend.hpp")
endif
endif
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/*.hpp)
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/impl/*.hpp)
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/containers/src/*.hpp)
@ -1257,6 +1364,9 @@ KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/containers/src/impl/*.cpp)
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.cpp)
ifeq ($(KOKKOS_INTERNAL_ENABLE_DESUL_ATOMICS), 1)
KOKKOS_SRC += $(KOKKOS_PATH)/core/src/desul/src/Lock_Array_CUDA.cpp
endif
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.hpp)
ifneq ($(CUDA_PATH),)
KOKKOS_CPPLAGS += -I$(CUDA_PATH)/include

View File

@ -48,6 +48,17 @@ Kokkos_Cuda_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cu
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Task.cpp
Kokkos_Cuda_Locks.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Locks.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Locks.cpp
Lock_Array_CUDA.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/desul/src/Lock_Array_CUDA.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/desul/src/Lock_Array_CUDA.cpp
endif
ifeq ($(KOKKOS_INTERNAL_USE_SYCL), 1)
Kokkos_SYCL.o : $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/SYCL/Kokkos_SYCL.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/SYCL/Kokkos_SYCL.cpp
Kokkos_SYCL_Space.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/SYCL/Kokkos_SYCL_Space.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/SYCL/Kokkos_SYCL_Space.cpp
Kokkos_SYCL_Instance.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/SYCL/Kokkos_SYCL_Instance.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/SYCL/Kokkos_SYCL_Instance.cpp
endif
ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1)

View File

@ -7,7 +7,7 @@ applications targeting all major HPC platforms. For that purpose it provides
abstractions for both parallel execution of code and data management.
Kokkos is designed to target complex node architectures with N-level memory
hierarchies and multiple types of execution resources. It currently can use
CUDA, HPX, OpenMP and Pthreads as backend programming models with several other
CUDA, HIP, SYCL, HPX, OpenMP and C++ threads as backend programming models with several other
backends in development.
Kokkos Core is part of the Kokkos C++ Performance Portability Programming EcoSystem,
@ -16,29 +16,19 @@ profiling and debugging tools (https://github.com/kokkos/kokkos-tools).
# Learning about Kokkos
A programming guide can be found on the Wiki, the API reference is under development.
The best way to start learning about Kokkos is going through the Kokkos Lectures.
They are online available at https://kokkos.link/the-lectures and contain a mix
of lecture videos and hands-on exercises covering all the important Kokkos Ecosystem
capabilities.
A programming guide and API reference can be found on the Wiki
(https://github.com/kokkos/kokkos/wiki).
For questions find us on Slack: https://kokkosteam.slack.com or open a github issue.
For non-public questions send an email to
crtrott(at)sandia.gov
A separate repository with extensive tutorial material can be found under
https://github.com/kokkos/kokkos-tutorials.
Furthermore, the 'example/tutorial' directory provides step by step tutorial
examples which explain many of the features of Kokkos. They work with
simple Makefiles. To build with g++ and OpenMP simply type 'make'
in the 'example/tutorial' directory. This will build all examples in the
subfolders. To change the build options refer to the Programming Guide
in the compilation section.
To learn more about Kokkos consider watching one of our presentations:
* GTC 2015:
- http://on-demand.gputechconf.com/gtc/2015/video/S5166.html
- http://on-demand.gputechconf.com/gtc/2015/presentation/S5166-H-Carter-Edwards.pdf
# Contributing to Kokkos
We are open and try to encourage contributions from external developers.
@ -53,57 +43,40 @@ For specifics see the LICENSE file contained in the repository or distribution.
# Requirements
### Primary tested compilers on X86 are:
* GCC 5.3.0
* GCC 5.4.0
* GCC 5.5.0
* GCC 6.1.0
* GCC 7.2.0
* GCC 7.3.0
* GCC 8.1.0
* Intel 17.0.1
* Intel 17.4.196
* Intel 18.2.128
* Clang 4.0.0
* Clang 6.0.0 for CUDA (CUDA Toolkit 9.0)
* Clang 7.0.0 for CUDA (CUDA Toolkit 9.1)
* Clang 8.0.0 for CUDA (CUDA Toolkit 9.2)
* PGI 18.7
* NVCC 9.1 for CUDA (with gcc 6.1.0)
* NVCC 9.2 for CUDA (with gcc 7.2.0)
* NVCC 10.0 for CUDA (with gcc 7.4.0)
* NVCC 10.1 for CUDA (with gcc 7.4.0)
* NVCC 11.0 for CUDA (with gcc 8.4.0)
### Minimum Compiler Versions
### Primary tested compilers on Power 8 are:
* GCC 6.4.0 (OpenMP,Serial)
* GCC 7.2.0 (OpenMP,Serial)
* IBM XL 16.1.0 (OpenMP, Serial)
* NVCC 9.2.88 for CUDA (with gcc 7.2.0 and XL 16.1.0)
Generally Kokkos should work with all compiler versions newer than the minimum.
However as in all sufficiently complex enough code, we have to work around compiler
bugs with almost all compilers. So compiler versions we don't test may have issues
we are unaware off.
### Primary tested compilers on Intel KNL are:
* Intel 17.2.174 (with gcc 6.2.0 and 6.4.0)
* Intel 18.2.199 (with gcc 6.2.0 and 6.4.0)
* GCC: 5.3.0
* Clang: 4.0.0
* Intel: 17.0.1
* NVCC: 9.2.88
* NVC++: 21.5
* ROCM: 4.3
* MSVC: 19.29
* IBM XL: 16.1.1
* Fujitsu: 4.5.0
* ARM/Clang 20.1
### Primary tested compilers on ARM (Cavium ThunderX2)
* GCC 7.2.0
* ARM/Clang 18.4.0
### Primary Tested Compilers
### Other compilers working:
* X86:
* Cygwin 2.1.0 64bit with gcc 4.9.3
* GCC 8.1.0 (not warning free)
### Known non-working combinations:
* Power8:
* Pthreads backend
* ARM
* Pthreads backend
* GCC: 5.3.0, 6.1.0, 7.3.0, 8.3, 9.2, 10.0
* NVCC: 9.2.88, 10.1, 11.0
* Clang: 8.0.0, 9.0.0, 10.0.0, 12.0.0
* Intel 17.4, 18.1, 19.5
* MSVC: 19.29
* ARM/Clang: 20.1
* IBM XL: 16.1.1
* ROCM: 4.3.0
### Build system:
* CMake >= 3.10: required
* CMake >= 3.13: recommended
* CMake >= 3.16: required
* CMake >= 3.18: Fortran linkage. This does not affect most mixed Fortran/Kokkos builds. See [build issues](BUILD.md#KnownIssues).
* CMake >= 3.21.1 for NVC++
Primary tested compiler are passing in release mode
with warnings as errors. They also are tested with a comprehensive set of
@ -153,7 +126,6 @@ cmake $srcdir \
-DCMAKE_INSTALL_PREFIX=$path_to_install \
-DKokkos_ENABLE_OPENMP=On \
-DKokkos_ARCH_HSW=On \
-DKokkos_ENABLE_HWLOC=On \
-DKokkos_HWLOC_DIR=$path_to_hwloc
````
then simply type `make install`. The Kokkos CMake package will then be installed in `$path_to_install` to be used by downstream packages.
@ -212,23 +184,8 @@ where `...` is the unique spec identifying the particular Kokkos configuration a
Some more details can found in the Kokkos spack [documentation](Spack.md) or the Spack [website](https://spack.readthedocs.io/en/latest).
## Raw Makefile
A bash script is provided to generate raw makefiles.
To install Kokkos as a library create a build directory and run the following
````bash
> $KOKKOS_PATH/generate_makefile.bash --prefix=$path_to_install
````
Once the Makefile is generated, run:
````bash
> make kokkoslib
> make install
````
To additionally run the unit tests:
````bash
> make build-test
> make test
````
Run `generate_makefile.bash --help` for more detailed options such as
changing the device type for which to build.
Raw Makefiles are only supported via inline builds. See below.
## Inline Builds vs. Installed Package
For individual projects, it may be preferable to build Kokkos inline rather than link to an installed package.
@ -268,6 +225,35 @@ more than a single GPU is used by a single process.
If you publish work which mentions Kokkos, please cite the following paper:
````BibTex
@ARTICLE{9485033,
author={Trott, Christian R. and Lebrun-Grandié, Damien and Arndt, Daniel and Ciesko, Jan and Dang, Vinh and Ellingwood, Nathan and Gayatri, Rahulkumar and Harvey, Evan and Hollman, Daisy S. and Ibanez, Dan and Liber, Nevin and Madsen, Jonathan and Miles, Jeff and Poliakoff, David and Powell, Amy and Rajamanickam, Sivasankaran and Simberg, Mikael and Sunderland, Dan and Turcksin, Bruno and Wilke, Jeremiah},
journal={IEEE Transactions on Parallel and Distributed Systems},
title={Kokkos 3: Programming Model Extensions for the Exascale Era},
year={2022},
volume={33},
number={4},
pages={805-817},
doi={10.1109/TPDS.2021.3097283}}
````
If you use more than one Kokkos EcoSystem package, please also cite:
````BibTex
@ARTICLE{9502936,
author={Trott, Christian and Berger-Vergiat, Luc and Poliakoff, David and Rajamanickam, Sivasankaran and Lebrun-Grandie, Damien and Madsen, Jonathan and Al Awar, Nader and Gligoric, Milos and Shipman, Galen and Womeldorff, Geoff},
journal={Computing in Science Engineering},
title={The Kokkos EcoSystem: Comprehensive Performance Portability for High Performance Computing},
year={2021},
volume={23},
number={5},
pages={10-18},
doi={10.1109/MCSE.2021.3098509}}
````
And if you feel generous: feel free to cite the original Kokkos paper which describes most of the basic Kokkos concepts:
````BibTeX
@article{CarterEdwards20143202,
title = "Kokkos: Enabling manycore performance portability through polymorphic memory access patterns ",

View File

@ -5,9 +5,7 @@ KOKKOS_SUBPACKAGE(Algorithms)
IF (NOT Kokkos_INSTALL_TESTING)
ADD_SUBDIRECTORY(src)
ENDIF()
IF(NOT (KOKKOS_ENABLE_OPENMPTARGET
AND (KOKKOS_CXX_COMPILER_ID STREQUAL PGI OR
KOKKOS_CXX_COMPILER_ID STREQUAL NVHPC)))
IF(NOT (KOKKOS_ENABLE_OPENMPTARGET AND KOKKOS_CXX_COMPILER_ID STREQUAL NVHPC))
KOKKOS_ADD_TEST_DIRECTORIES(unit_tests)
ENDIF()

View File

@ -447,6 +447,25 @@ struct rand<Generator, unsigned long long> {
}
};
#if defined(KOKKOS_HALF_T_IS_FLOAT) && !KOKKOS_HALF_T_IS_FLOAT
template <class Generator>
struct rand<Generator, Kokkos::Experimental::half_t> {
using half = Kokkos::Experimental::half_t;
KOKKOS_INLINE_FUNCTION
static half max() { return half(1.0); }
KOKKOS_INLINE_FUNCTION
static half draw(Generator& gen) { return half(gen.frand()); }
KOKKOS_INLINE_FUNCTION
static half draw(Generator& gen, const half& range) {
return half(gen.frand(float(range)));
}
KOKKOS_INLINE_FUNCTION
static half draw(Generator& gen, const half& start, const half& end) {
return half(gen.frand(float(start), float(end)));
}
};
#endif // defined(KOKKOS_HALF_T_IS_FLOAT) && !KOKKOS_HALF_T_IS_FLOAT
template <class Generator>
struct rand<Generator, float> {
KOKKOS_INLINE_FUNCTION
@ -600,7 +619,7 @@ struct Random_XorShift1024_UseCArrayState<Kokkos::Experimental::OpenMPTarget>
template <class ExecutionSpace>
struct Random_UniqueIndex {
using locks_view_type = View<int*, ExecutionSpace>;
using locks_view_type = View<int**, ExecutionSpace>;
KOKKOS_FUNCTION
static int get_state_idx(const locks_view_type) {
#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
@ -615,7 +634,7 @@ struct Random_UniqueIndex {
#ifdef KOKKOS_ENABLE_CUDA
template <>
struct Random_UniqueIndex<Kokkos::Cuda> {
using locks_view_type = View<int*, Kokkos::Cuda>;
using locks_view_type = View<int**, Kokkos::Cuda>;
KOKKOS_FUNCTION
static int get_state_idx(const locks_view_type& locks_) {
#ifdef __CUDA_ARCH__
@ -625,7 +644,7 @@ struct Random_UniqueIndex<Kokkos::Cuda> {
blockDim.x * blockDim.y * blockDim.z +
i_offset) %
locks_.extent(0);
while (Kokkos::atomic_compare_exchange(&locks_(i), 0, 1)) {
while (Kokkos::atomic_compare_exchange(&locks_(i, 0), 0, 1)) {
i += blockDim.x * blockDim.y * blockDim.z;
if (i >= static_cast<int>(locks_.extent(0))) {
i = i_offset;
@ -643,7 +662,7 @@ struct Random_UniqueIndex<Kokkos::Cuda> {
#ifdef KOKKOS_ENABLE_HIP
template <>
struct Random_UniqueIndex<Kokkos::Experimental::HIP> {
using locks_view_type = View<int*, Kokkos::Experimental::HIP>;
using locks_view_type = View<int**, Kokkos::Experimental::HIP>;
KOKKOS_FUNCTION
static int get_state_idx(const locks_view_type& locks_) {
#ifdef __HIP_DEVICE_COMPILE__
@ -653,7 +672,7 @@ struct Random_UniqueIndex<Kokkos::Experimental::HIP> {
blockDim.x * blockDim.y * blockDim.z +
i_offset) %
locks_.extent(0);
while (Kokkos::atomic_compare_exchange(&locks_(i), 0, 1)) {
while (Kokkos::atomic_compare_exchange(&locks_(i, 0), 0, 1)) {
i += blockDim.x * blockDim.y * blockDim.z;
if (i >= static_cast<int>(locks_.extent(0))) {
i = i_offset;
@ -671,15 +690,15 @@ struct Random_UniqueIndex<Kokkos::Experimental::HIP> {
#ifdef KOKKOS_ENABLE_SYCL
template <>
struct Random_UniqueIndex<Kokkos::Experimental::SYCL> {
using locks_view_type = View<int*, Kokkos::Experimental::SYCL>;
using locks_view_type = View<int**, Kokkos::Experimental::SYCL>;
KOKKOS_FUNCTION
static int get_state_idx(const locks_view_type& locks_) {
#ifdef KOKKOS_ARCH_INTEL_GEN
#ifdef KOKKOS_ARCH_INTEL_GPU
int i = Kokkos::Impl::clock_tic() % locks_.extent(0);
#else
int i = 0;
#endif
while (Kokkos::atomic_compare_exchange(&locks_(i), 0, 1)) {
while (Kokkos::atomic_compare_exchange(&locks_(i, 0), 0, 1)) {
i = (i + 1) % static_cast<int>(locks_.extent(0));
}
return i;
@ -690,14 +709,14 @@ struct Random_UniqueIndex<Kokkos::Experimental::SYCL> {
#ifdef KOKKOS_ENABLE_OPENMPTARGET
template <>
struct Random_UniqueIndex<Kokkos::Experimental::OpenMPTarget> {
using locks_view_type = View<int*, Kokkos::Experimental::OpenMPTarget>;
using locks_view_type = View<int**, Kokkos::Experimental::OpenMPTarget>;
KOKKOS_FUNCTION
static int get_state_idx(const locks_view_type& locks) {
const int team_size = omp_get_num_threads();
int i = omp_get_team_num() * team_size + omp_get_thread_num();
const int lock_size = locks.extent_int(0);
while (Kokkos::atomic_compare_exchange(&locks(i), 0, 1)) {
while (Kokkos::atomic_compare_exchange(&locks(i, 0), 0, 1)) {
i = (i + 1) % lock_size;
}
return i;
@ -856,18 +875,22 @@ template <class DeviceType = Kokkos::DefaultExecutionSpace>
class Random_XorShift64_Pool {
private:
using execution_space = typename DeviceType::execution_space;
using locks_type = View<int*, execution_space>;
using state_data_type = View<uint64_t*, DeviceType>;
using locks_type = View<int**, execution_space>;
using state_data_type = View<uint64_t**, DeviceType>;
locks_type locks_;
state_data_type state_;
int num_states_;
int padding_;
public:
using generator_type = Random_XorShift64<DeviceType>;
using device_type = DeviceType;
KOKKOS_INLINE_FUNCTION
Random_XorShift64_Pool() { num_states_ = 0; }
Random_XorShift64_Pool() {
num_states_ = 0;
padding_ = 0;
}
Random_XorShift64_Pool(uint64_t seed) {
num_states_ = 0;
@ -883,16 +906,22 @@ class Random_XorShift64_Pool {
locks_ = src.locks_;
state_ = src.state_;
num_states_ = src.num_states_;
padding_ = src.padding_;
return *this;
}
void init(uint64_t seed, int num_states) {
if (seed == 0) seed = uint64_t(1318319);
// I only want to pad on CPU like archs (less than 1000 threads). 64 is a
// magic number, or random number I just wanted something not too large and
// not too small. 64 sounded fine.
padding_ = num_states < 1000 ? 64 : 1;
num_states_ = num_states;
locks_ = locks_type("Kokkos::Random_XorShift64::locks", num_states_);
state_ = state_data_type("Kokkos::Random_XorShift64::state", num_states_);
locks_ =
locks_type("Kokkos::Random_XorShift64::locks", num_states, padding_);
state_ = state_data_type("Kokkos::Random_XorShift64::state", num_states_,
padding_);
typename state_data_type::HostMirror h_state = create_mirror_view(state_);
typename locks_type::HostMirror h_lock = create_mirror_view(locks_);
@ -902,15 +931,15 @@ class Random_XorShift64_Pool {
gen(seed, 0);
for (int i = 0; i < 17; i++) gen.rand();
for (int i = 0; i < num_states_; i++) {
int n1 = gen.rand();
int n2 = gen.rand();
int n3 = gen.rand();
int n4 = gen.rand();
h_state(i) = (((static_cast<uint64_t>(n1)) & 0xffff) << 00) |
(((static_cast<uint64_t>(n2)) & 0xffff) << 16) |
(((static_cast<uint64_t>(n3)) & 0xffff) << 32) |
(((static_cast<uint64_t>(n4)) & 0xffff) << 48);
h_lock(i) = 0;
int n1 = gen.rand();
int n2 = gen.rand();
int n3 = gen.rand();
int n4 = gen.rand();
h_state(i, 0) = (((static_cast<uint64_t>(n1)) & 0xffff) << 00) |
(((static_cast<uint64_t>(n2)) & 0xffff) << 16) |
(((static_cast<uint64_t>(n3)) & 0xffff) << 32) |
(((static_cast<uint64_t>(n4)) & 0xffff) << 48);
h_lock(i, 0) = 0;
}
deep_copy(state_, h_state);
deep_copy(locks_, h_lock);
@ -920,19 +949,19 @@ class Random_XorShift64_Pool {
Random_XorShift64<DeviceType> get_state() const {
const int i =
Impl::Random_UniqueIndex<execution_space>::get_state_idx(locks_);
return Random_XorShift64<DeviceType>(state_(i), i);
return Random_XorShift64<DeviceType>(state_(i, 0), i);
}
// NOTE: state_idx MUST be unique and less than num_states
KOKKOS_INLINE_FUNCTION
Random_XorShift64<DeviceType> get_state(const int state_idx) const {
return Random_XorShift64<DeviceType>(state_(state_idx), state_idx);
return Random_XorShift64<DeviceType>(state_(state_idx, 0), state_idx);
}
KOKKOS_INLINE_FUNCTION
void free_state(const Random_XorShift64<DeviceType>& state) const {
state_(state.state_idx_) = state.state_;
locks_(state.state_idx_) = 0;
state_(state.state_idx_, 0) = state.state_;
locks_(state.state_idx_, 0) = 0;
}
};
@ -1092,14 +1121,15 @@ template <class DeviceType = Kokkos::DefaultExecutionSpace>
class Random_XorShift1024_Pool {
private:
using execution_space = typename DeviceType::execution_space;
using locks_type = View<int*, execution_space>;
using int_view_type = View<int*, DeviceType>;
using locks_type = View<int**, execution_space>;
using int_view_type = View<int**, DeviceType>;
using state_data_type = View<uint64_t * [16], DeviceType>;
locks_type locks_;
state_data_type state_;
int_view_type p_;
int num_states_;
int padding_;
friend class Random_XorShift1024<DeviceType>;
public:
@ -1129,15 +1159,21 @@ class Random_XorShift1024_Pool {
state_ = src.state_;
p_ = src.p_;
num_states_ = src.num_states_;
padding_ = src.padding_;
return *this;
}
inline void init(uint64_t seed, int num_states) {
if (seed == 0) seed = uint64_t(1318319);
// I only want to pad on CPU like archs (less than 1000 threads). 64 is a
// magic number, or random number I just wanted something not too large and
// not too small. 64 sounded fine.
padding_ = num_states < 1000 ? 64 : 1;
num_states_ = num_states;
locks_ = locks_type("Kokkos::Random_XorShift1024::locks", num_states_);
locks_ =
locks_type("Kokkos::Random_XorShift1024::locks", num_states_, padding_);
state_ = state_data_type("Kokkos::Random_XorShift1024::state", num_states_);
p_ = int_view_type("Kokkos::Random_XorShift1024::p", num_states_);
p_ = int_view_type("Kokkos::Random_XorShift1024::p", num_states_, padding_);
typename state_data_type::HostMirror h_state = create_mirror_view(state_);
typename locks_type::HostMirror h_lock = create_mirror_view(locks_);
@ -1158,8 +1194,8 @@ class Random_XorShift1024_Pool {
(((static_cast<uint64_t>(n3)) & 0xffff) << 32) |
(((static_cast<uint64_t>(n4)) & 0xffff) << 48);
}
h_p(i) = 0;
h_lock(i) = 0;
h_p(i, 0) = 0;
h_lock(i, 0) = 0;
}
deep_copy(state_, h_state);
deep_copy(locks_, h_lock);
@ -1169,20 +1205,20 @@ class Random_XorShift1024_Pool {
Random_XorShift1024<DeviceType> get_state() const {
const int i =
Impl::Random_UniqueIndex<execution_space>::get_state_idx(locks_);
return Random_XorShift1024<DeviceType>(state_, p_(i), i);
return Random_XorShift1024<DeviceType>(state_, p_(i, 0), i);
};
// NOTE: state_idx MUST be unique and less than num_states
KOKKOS_INLINE_FUNCTION
Random_XorShift1024<DeviceType> get_state(const int state_idx) const {
return Random_XorShift1024<DeviceType>(state_, p_(state_idx), state_idx);
return Random_XorShift1024<DeviceType>(state_, p_(state_idx, 0), state_idx);
}
KOKKOS_INLINE_FUNCTION
void free_state(const Random_XorShift1024<DeviceType>& state) const {
for (int i = 0; i < 16; i++) state_(state.state_idx_, i) = state.state_[i];
p_(state.state_idx_) = state.p_;
locks_(state.state_idx_) = 0;
p_(state.state_idx_, 0) = state.p_;
locks_(state.state_idx_, 0) = 0;
}
};

View File

@ -206,8 +206,10 @@ class BinSort {
//----------------------------------------
// Constructor: takes the keys, the binning_operator and optionally whether to
// sort within bins (default false)
BinSort(const_key_view_type keys_, int range_begin_, int range_end_,
BinSortOp bin_op_, bool sort_within_bins_ = false)
template <typename ExecutionSpace>
BinSort(const ExecutionSpace& exec, const_key_view_type keys_,
int range_begin_, int range_end_, BinSortOp bin_op_,
bool sort_within_bins_ = false)
: keys(keys_),
keys_rnd(keys_),
bin_op(bin_op_),
@ -222,50 +224,63 @@ class BinSort {
"Kokkos::SortImpl::BinSortFunctor::bin_count", bin_op.max_bins());
bin_count_const = bin_count_atomic;
bin_offsets =
offset_type(view_alloc(WithoutInitializing,
offset_type(view_alloc(exec, WithoutInitializing,
"Kokkos::SortImpl::BinSortFunctor::bin_offsets"),
bin_op.max_bins());
sort_order =
offset_type(view_alloc(WithoutInitializing,
offset_type(view_alloc(exec, WithoutInitializing,
"Kokkos::SortImpl::BinSortFunctor::sort_order"),
range_end - range_begin);
}
BinSort(const_key_view_type keys_, int range_begin_, int range_end_,
BinSortOp bin_op_, bool sort_within_bins_ = false)
: BinSort(execution_space{}, keys_, range_begin_, range_end_, bin_op_,
sort_within_bins_) {}
template <typename ExecutionSpace>
BinSort(const ExecutionSpace& exec, const_key_view_type keys_,
BinSortOp bin_op_, bool sort_within_bins_ = false)
: BinSort(exec, keys_, 0, keys_.extent(0), bin_op_, sort_within_bins_) {}
BinSort(const_key_view_type keys_, BinSortOp bin_op_,
bool sort_within_bins_ = false)
: BinSort(keys_, 0, keys_.extent(0), bin_op_, sort_within_bins_) {}
: BinSort(execution_space{}, keys_, bin_op_, sort_within_bins_) {}
//----------------------------------------
// Create the permutation vector, the bin_offset array and the bin_count
// array. Can be called again if keys changed
void create_permute_vector() {
template <class ExecutionSpace = execution_space>
void create_permute_vector(const ExecutionSpace& exec = execution_space{}) {
const size_t len = range_end - range_begin;
Kokkos::parallel_for(
"Kokkos::Sort::BinCount",
Kokkos::RangePolicy<execution_space, bin_count_tag>(0, len), *this);
Kokkos::RangePolicy<ExecutionSpace, bin_count_tag>(exec, 0, len),
*this);
Kokkos::parallel_scan("Kokkos::Sort::BinOffset",
Kokkos::RangePolicy<execution_space, bin_offset_tag>(
0, bin_op.max_bins()),
Kokkos::RangePolicy<ExecutionSpace, bin_offset_tag>(
exec, 0, bin_op.max_bins()),
*this);
Kokkos::deep_copy(bin_count_atomic, 0);
Kokkos::deep_copy(exec, bin_count_atomic, 0);
Kokkos::parallel_for(
"Kokkos::Sort::BinBinning",
Kokkos::RangePolicy<execution_space, bin_binning_tag>(0, len), *this);
Kokkos::RangePolicy<ExecutionSpace, bin_binning_tag>(exec, 0, len),
*this);
if (sort_within_bins)
Kokkos::parallel_for(
"Kokkos::Sort::BinSort",
Kokkos::RangePolicy<execution_space, bin_sort_bins_tag>(
0, bin_op.max_bins()),
Kokkos::RangePolicy<ExecutionSpace, bin_sort_bins_tag>(
exec, 0, bin_op.max_bins()),
*this);
}
// Sort a subset of a view with respect to the first dimension using the
// permutation array
template <class ValuesViewType>
void sort(ValuesViewType const& values, int values_range_begin,
int values_range_end) const {
template <class ExecutionSpace, class ValuesViewType>
void sort(const ExecutionSpace& exec, ValuesViewType const& values,
int values_range_begin, int values_range_end) const {
using scratch_view_type =
Kokkos::View<typename ValuesViewType::data_type,
typename ValuesViewType::array_layout,
@ -279,7 +294,7 @@ class BinSort {
}
scratch_view_type sorted_values(
view_alloc(WithoutInitializing,
view_alloc(exec, WithoutInitializing,
"Kokkos::SortImpl::BinSortFunctor::sorted_values"),
values.rank_dynamic > 0 ? len : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
values.rank_dynamic > 1 ? values.extent(1)
@ -308,7 +323,7 @@ class BinSort {
values_range_begin - range_begin);
parallel_for("Kokkos::Sort::CopyPermute",
Kokkos::RangePolicy<execution_space>(0, len), functor);
Kokkos::RangePolicy<ExecutionSpace>(exec, 0, len), functor);
}
{
@ -316,10 +331,23 @@ class BinSort {
values, range_begin, sorted_values);
parallel_for("Kokkos::Sort::Copy",
Kokkos::RangePolicy<execution_space>(0, len), functor);
Kokkos::RangePolicy<ExecutionSpace>(exec, 0, len), functor);
}
}
execution_space().fence();
// Sort a subset of a view with respect to the first dimension using the
// permutation array
template <class ValuesViewType>
void sort(ValuesViewType const& values, int values_range_begin,
int values_range_end) const {
execution_space exec;
sort(exec, values, values_range_begin, values_range_end);
exec.fence("Kokkos::Sort: fence after sorting");
}
template <class ExecutionSpace, class ValuesViewType>
void sort(ExecutionSpace const& exec, ValuesViewType const& values) const {
this->sort(exec, values, 0, /*values.extent(0)*/ range_end - range_begin);
}
template <class ValuesViewType>
@ -485,17 +513,19 @@ struct BinOp3D {
namespace Impl {
template <class ViewType>
bool try_std_sort(ViewType view) {
template <class ViewType, class ExecutionSpace>
bool try_std_sort(ViewType view, const ExecutionSpace& exec) {
bool possible = true;
size_t stride[8] = {view.stride_0(), view.stride_1(), view.stride_2(),
view.stride_3(), view.stride_4(), view.stride_5(),
view.stride_6(), view.stride_7()};
possible = possible &&
std::is_same<typename ViewType::memory_space, HostSpace>::value;
SpaceAccessibility<HostSpace,
typename ViewType::memory_space>::accessible;
possible = possible && (ViewType::Rank == 1);
possible = possible && (stride[0] == 1);
if (possible) {
exec.fence("Kokkos::sort: Fence before sorting on the host");
std::sort(view.data(), view.data() + view.extent(0));
}
return possible;
@ -518,10 +548,12 @@ struct min_max_functor {
} // namespace Impl
template <class ViewType>
void sort(ViewType const& view, bool const always_use_kokkos_sort = false) {
template <class ExecutionSpace, class ViewType>
std::enable_if_t<Kokkos::is_execution_space<ExecutionSpace>::value> sort(
const ExecutionSpace& exec, ViewType const& view,
bool const always_use_kokkos_sort = false) {
if (!always_use_kokkos_sort) {
if (Impl::try_std_sort(view)) return;
if (Impl::try_std_sort(view, exec)) return;
}
using CompType = BinOp1D<ViewType>;
@ -529,34 +561,50 @@ void sort(ViewType const& view, bool const always_use_kokkos_sort = false) {
Kokkos::MinMax<typename ViewType::non_const_value_type> reducer(result);
parallel_reduce("Kokkos::Sort::FindExtent",
Kokkos::RangePolicy<typename ViewType::execution_space>(
0, view.extent(0)),
exec, 0, view.extent(0)),
Impl::min_max_functor<ViewType>(view), reducer);
if (result.min_val == result.max_val) return;
BinSort<ViewType, CompType> bin_sort(
view, CompType(view.extent(0) / 2, result.min_val, result.max_val), true);
bin_sort.create_permute_vector();
bin_sort.sort(view);
bin_sort.create_permute_vector(exec);
bin_sort.sort(exec, view);
}
template <class ViewType>
void sort(ViewType view, size_t const begin, size_t const end) {
void sort(ViewType const& view, bool const always_use_kokkos_sort = false) {
typename ViewType::execution_space exec;
sort(exec, view, always_use_kokkos_sort);
exec.fence("Kokkos::Sort: fence after sorting");
}
template <class ExecutionSpace, class ViewType>
std::enable_if_t<Kokkos::is_execution_space<ExecutionSpace>::value> sort(
const ExecutionSpace& exec, ViewType view, size_t const begin,
size_t const end) {
using range_policy = Kokkos::RangePolicy<typename ViewType::execution_space>;
using CompType = BinOp1D<ViewType>;
Kokkos::MinMaxScalar<typename ViewType::non_const_value_type> result;
Kokkos::MinMax<typename ViewType::non_const_value_type> reducer(result);
parallel_reduce("Kokkos::Sort::FindExtent", range_policy(begin, end),
parallel_reduce("Kokkos::Sort::FindExtent", range_policy(exec, begin, end),
Impl::min_max_functor<ViewType>(view), reducer);
if (result.min_val == result.max_val) return;
BinSort<ViewType, CompType> bin_sort(
view, begin, end,
exec, view, begin, end,
CompType((end - begin) / 2, result.min_val, result.max_val), true);
bin_sort.create_permute_vector();
bin_sort.sort(view, begin, end);
bin_sort.create_permute_vector(exec);
bin_sort.sort(exec, view, begin, end);
}
template <class ViewType>
void sort(ViewType view, size_t const begin, size_t const end) {
typename ViewType::execution_space exec;
sort(exec, view, begin, end);
exec.fence("Kokkos::Sort: fence after sorting");
}
} // namespace Kokkos

View File

@ -47,7 +47,7 @@
#include <iostream>
#include <cstdlib>
#include <cstdio>
#include <impl/Kokkos_Timer.hpp>
#include <Kokkos_Timer.hpp>
#include <Kokkos_Core.hpp>
#include <Kokkos_Random.hpp>
#include <cmath>
@ -198,11 +198,50 @@ struct test_random_functor {
static_cast<uint64_t>(1.0 * HIST_DIM3D * tmp2 / theMax);
const uint64_t ind3_3d =
static_cast<uint64_t>(1.0 * HIST_DIM3D * tmp3 / theMax);
// Workaround Intel 17 compiler bug which sometimes add random
// instruction alignment which makes the lock instruction
// illegal. Seems to be mostly just for unsigned int atomics.
// Looking at the assembly the compiler
// appears to insert cache line alignment for the instruction.
// Isn't restricted to specific archs. Seen it on SNB and SKX, but for
// different code. Another occurrence was with Desul atomics in
// a different unit test. This one here happens without desul atomics.
// Inserting an assembly nop instruction changes the alignment and
// works round this.
//
// 17.0.4 for 64bit Random works with 1/1/1/2/1
// 17.0.4 for 1024bit Random works with 1/1/1/1/1
#ifdef KOKKOS_COMPILER_INTEL
#if (KOKKOS_COMPILER_INTEL < 1800)
asm volatile("nop\n");
#endif
#endif
atomic_fetch_add(&density_1d(ind1_1d), 1);
#ifdef KOKKOS_COMPILER_INTEL
#if (KOKKOS_COMPILER_INTEL < 1800)
asm volatile("nop\n");
#endif
#endif
atomic_fetch_add(&density_1d(ind2_1d), 1);
#ifdef KOKKOS_COMPILER_INTEL
#if (KOKKOS_COMPILER_INTEL < 1800)
asm volatile("nop\n");
#endif
#endif
atomic_fetch_add(&density_1d(ind3_1d), 1);
#ifdef KOKKOS_COMPILER_INTEL
#if (KOKKOS_COMPILER_INTEL < 1800)
if (std::is_same<rnd_type, Kokkos::Random_XorShift64<device_type>>::value)
asm volatile("nop\n");
asm volatile("nop\n");
#endif
#endif
atomic_fetch_add(&density_3d(ind1_3d, ind2_3d, ind3_3d), 1);
#ifdef KOKKOS_COMPILER_INTEL
#if (KOKKOS_COMPILER_INTEL < 1800)
asm volatile("nop\n");
#endif
#endif
}
rand_pool.free_state(rand_gen);
}
@ -338,9 +377,11 @@ struct test_random_scalar {
using functor_type =
test_histogram1d_functor<typename RandomGenerator::device_type>;
parallel_reduce(HIST_DIM1D, functor_type(density_1d, num_draws), result);
double tolerance = 6 * std::sqrt(1.0 / HIST_DIM1D);
double mean_expect = 1.0 * num_draws * 3 / HIST_DIM1D;
double mean_eps_expect = 0.0001;
double variance_eps_expect = 0.07;
double covariance_eps_expect = 0.06;
double tolerance = 6 * std::sqrt(1.0 / HIST_DIM1D);
double mean_expect = 1.0 * num_draws * 3 / HIST_DIM1D;
double variance_expect =
1.0 * num_draws * 3 / HIST_DIM1D * (1.0 - 1.0 / HIST_DIM1D);
double covariance_expect = -1.0 * num_draws * 3 / HIST_DIM1D / HIST_DIM1D;
@ -349,11 +390,26 @@ struct test_random_scalar {
variance_expect / (result.variance / HIST_DIM1D) - 1.0;
double covariance_eps =
(result.covariance / HIST_DIM1D - covariance_expect) / mean_expect;
pass_hist1d_mean = ((-0.0001 < mean_eps) && (0.0001 > mean_eps)) ? 1 : 0;
pass_hist1d_var =
((-0.07 < variance_eps) && (0.07 > variance_eps)) ? 1 : 0;
pass_hist1d_covar =
((-0.06 < covariance_eps) && (0.06 > covariance_eps)) ? 1 : 0;
#if defined(KOKKOS_HALF_T_IS_FLOAT) && !KOKKOS_HALF_T_IS_FLOAT
if (std::is_same<Scalar, Kokkos::Experimental::half_t>::value) {
mean_eps_expect = 0.0003;
variance_eps_expect = 1.0;
covariance_eps_expect = 5.0e4;
}
#endif
pass_hist1d_mean =
((-mean_eps_expect < mean_eps) && (mean_eps_expect > mean_eps)) ? 1
: 0;
pass_hist1d_var = ((-variance_eps_expect < variance_eps) &&
(variance_eps_expect > variance_eps))
? 1
: 0;
pass_hist1d_covar = ((-covariance_eps_expect < covariance_eps) &&
(covariance_eps_expect > covariance_eps))
? 1
: 0;
cout << "Density 1D: " << mean_eps << " " << variance_eps << " "
<< (result.covariance / HIST_DIM1D / HIST_DIM1D) << " || "
@ -371,8 +427,9 @@ struct test_random_scalar {
test_histogram3d_functor<typename RandomGenerator::device_type>;
parallel_reduce(HIST_DIM1D, functor_type(density_3d, num_draws), result);
double tolerance = 6 * std::sqrt(1.0 / HIST_DIM1D);
double mean_expect = 1.0 * num_draws / HIST_DIM1D;
double variance_factor = 1.2;
double tolerance = 6 * std::sqrt(1.0 / HIST_DIM1D);
double mean_expect = 1.0 * num_draws / HIST_DIM1D;
double variance_expect =
1.0 * num_draws / HIST_DIM1D * (1.0 - 1.0 / HIST_DIM1D);
double covariance_expect = -1.0 * num_draws / HIST_DIM1D / HIST_DIM1D;
@ -381,15 +438,23 @@ struct test_random_scalar {
variance_expect / (result.variance / HIST_DIM1D) - 1.0;
double covariance_eps =
(result.covariance / HIST_DIM1D - covariance_expect) / mean_expect;
#if defined(KOKKOS_HALF_T_IS_FLOAT) && !KOKKOS_HALF_T_IS_FLOAT
if (std::is_same<Scalar, Kokkos::Experimental::half_t>::value) {
variance_factor = 7;
}
#endif
pass_hist3d_mean =
((-tolerance < mean_eps) && (tolerance > mean_eps)) ? 1 : 0;
pass_hist3d_var = ((-1.2 * tolerance < variance_eps) &&
(1.2 * tolerance > variance_eps))
pass_hist3d_var = ((-variance_factor * tolerance < variance_eps) &&
(variance_factor * tolerance > variance_eps))
? 1
: 0;
pass_hist3d_covar =
((-tolerance < covariance_eps) && (tolerance > covariance_eps)) ? 1
: 0;
pass_hist3d_covar = ((-variance_factor * tolerance < covariance_eps) &&
(variance_factor * tolerance > covariance_eps))
? 1
: 0;
cout << "Density 3D: " << mean_eps << " " << variance_eps << " "
<< result.covariance / HIST_DIM1D / HIST_DIM1D << " || " << tolerance
@ -471,6 +536,21 @@ void test_random(unsigned int num_draws) {
deep_copy(density_1d, 0);
deep_copy(density_3d, 0);
cout << "Test Scalar=half" << endl;
test_random_scalar<RandomGenerator, Kokkos::Experimental::half_t> test_half(
density_1d, density_3d, pool, num_draws);
ASSERT_EQ(test_half.pass_mean, 1);
ASSERT_EQ(test_half.pass_var, 1);
ASSERT_EQ(test_half.pass_covar, 1);
ASSERT_EQ(test_half.pass_hist1d_mean, 1);
ASSERT_EQ(test_half.pass_hist1d_var, 1);
ASSERT_EQ(test_half.pass_hist1d_covar, 1);
ASSERT_EQ(test_half.pass_hist3d_mean, 1);
ASSERT_EQ(test_half.pass_hist3d_var, 1);
ASSERT_EQ(test_half.pass_hist3d_covar, 1);
deep_copy(density_1d, 0);
deep_copy(density_3d, 0);
cout << "Test Scalar=float" << endl;
test_random_scalar<RandomGenerator, float> test_float(density_1d, density_3d,
pool, num_draws);

View File

@ -135,8 +135,9 @@ void test_1D_sort_impl(unsigned int n, bool force_kokkos) {
KeyViewType keys("Keys", n);
// Test sorting array with all numbers equal
Kokkos::deep_copy(keys, KeyType(1));
Kokkos::sort(keys, force_kokkos);
ExecutionSpace exec;
Kokkos::deep_copy(exec, keys, KeyType(1));
Kokkos::sort(exec, keys, force_kokkos);
Kokkos::Random_XorShift64_Pool<ExecutionSpace> g(1931);
Kokkos::fill_random(keys, g,
@ -147,13 +148,16 @@ void test_1D_sort_impl(unsigned int n, bool force_kokkos) {
double sum_after = 0.0;
unsigned int sort_fails = 0;
Kokkos::parallel_reduce(n, sum<ExecutionSpace, KeyType>(keys), sum_before);
Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecutionSpace>(exec, 0, n),
sum<ExecutionSpace, KeyType>(keys), sum_before);
Kokkos::sort(keys, force_kokkos);
Kokkos::sort(exec, keys, force_kokkos);
Kokkos::parallel_reduce(n, sum<ExecutionSpace, KeyType>(keys), sum_after);
Kokkos::parallel_reduce(
n - 1, is_sorted_struct<ExecutionSpace, KeyType>(keys), sort_fails);
Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecutionSpace>(exec, 0, n),
sum<ExecutionSpace, KeyType>(keys), sum_after);
Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecutionSpace>(exec, 0, n - 1),
is_sorted_struct<ExecutionSpace, KeyType>(keys),
sort_fails);
double ratio = sum_before / sum_after;
double epsilon = 1e-10;
@ -177,8 +181,10 @@ void test_3D_sort_impl(unsigned int n) {
double sum_after = 0.0;
unsigned int sort_fails = 0;
Kokkos::parallel_reduce(keys.extent(0), sum3D<ExecutionSpace, KeyType>(keys),
sum_before);
ExecutionSpace exec;
Kokkos::parallel_reduce(
Kokkos::RangePolicy<ExecutionSpace>(exec, 0, keys.extent(0)),
sum3D<ExecutionSpace, KeyType>(keys), sum_before);
int bin_1d = 1;
while (bin_1d * bin_1d * bin_1d * 4 < (int)keys.extent(0)) bin_1d *= 2;
@ -189,15 +195,17 @@ void test_3D_sort_impl(unsigned int n) {
using BinOp = Kokkos::BinOp3D<KeyViewType>;
BinOp bin_op(bin_max, min, max);
Kokkos::BinSort<KeyViewType, BinOp> Sorter(keys, bin_op, false);
Sorter.create_permute_vector();
Sorter.template sort<KeyViewType>(keys);
Sorter.create_permute_vector(exec);
Sorter.sort(exec, keys);
Kokkos::parallel_reduce(keys.extent(0), sum3D<ExecutionSpace, KeyType>(keys),
sum_after);
Kokkos::parallel_reduce(keys.extent(0) - 1,
bin3d_is_sorted_struct<ExecutionSpace, KeyType>(
keys, bin_1d, min[0], max[0]),
sort_fails);
Kokkos::parallel_reduce(
Kokkos::RangePolicy<ExecutionSpace>(exec, 0, keys.extent(0)),
sum3D<ExecutionSpace, KeyType>(keys), sum_after);
Kokkos::parallel_reduce(
Kokkos::RangePolicy<ExecutionSpace>(exec, 0, keys.extent(0) - 1),
bin3d_is_sorted_struct<ExecutionSpace, KeyType>(keys, bin_1d, min[0],
max[0]),
sort_fails);
double ratio = sum_before / sum_after;
double epsilon = 1e-10;
@ -229,36 +237,36 @@ void test_dynamic_view_sort_impl(unsigned int n) {
KeyViewType keys_view("KeysTmp", n);
// Test sorting array with all numbers equal
Kokkos::deep_copy(keys_view, KeyType(1));
ExecutionSpace exec;
Kokkos::deep_copy(exec, keys_view, KeyType(1));
Kokkos::deep_copy(keys, keys_view);
Kokkos::sort(keys, 0 /* begin */, n /* end */);
Kokkos::sort(exec, keys, 0 /* begin */, n /* end */);
Kokkos::Random_XorShift64_Pool<ExecutionSpace> g(1931);
Kokkos::fill_random(keys_view, g,
Kokkos::Random_XorShift64_Pool<
ExecutionSpace>::generator_type::MAX_URAND);
ExecutionSpace().fence();
exec.fence();
Kokkos::deep_copy(keys, keys_view);
// ExecutionSpace().fence();
double sum_before = 0.0;
double sum_after = 0.0;
unsigned int sort_fails = 0;
Kokkos::parallel_reduce(n, sum<ExecutionSpace, KeyType>(keys_view),
sum_before);
Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecutionSpace>(exec, 0, n),
sum<ExecutionSpace, KeyType>(keys_view), sum_before);
Kokkos::sort(keys, 0 /* begin */, n /* end */);
Kokkos::sort(exec, keys, 0 /* begin */, n /* end */);
ExecutionSpace().fence(); // Need this fence to prevent BusError with Cuda
exec.fence(); // Need this fence to prevent BusError with Cuda
Kokkos::deep_copy(keys_view, keys);
// ExecutionSpace().fence();
Kokkos::parallel_reduce(n, sum<ExecutionSpace, KeyType>(keys_view),
sum_after);
Kokkos::parallel_reduce(
n - 1, is_sorted_struct<ExecutionSpace, KeyType>(keys_view), sort_fails);
Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecutionSpace>(exec, 0, n),
sum<ExecutionSpace, KeyType>(keys_view), sum_after);
Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecutionSpace>(exec, 0, n - 1),
is_sorted_struct<ExecutionSpace, KeyType>(keys_view),
sort_fails);
double ratio = sum_before / sum_after;
double epsilon = 1e-10;
@ -301,9 +309,10 @@ void test_issue_1160_impl() {
for (int i = 0; i < 10; ++i) {
h_v.access(i, 0) = h_x.access(i, 0) = double(h_element(i));
}
Kokkos::deep_copy(element_, h_element);
Kokkos::deep_copy(x_, h_x);
Kokkos::deep_copy(v_, h_v);
ExecutionSpace exec;
Kokkos::deep_copy(exec, element_, h_element);
Kokkos::deep_copy(exec, x_, h_x);
Kokkos::deep_copy(exec, v_, h_v);
using KeyViewType = decltype(element_);
using BinOp = Kokkos::BinOp1D<KeyViewType>;
@ -316,15 +325,16 @@ void test_issue_1160_impl() {
Kokkos::BinSort<KeyViewType, BinOp> Sorter(element_, begin, end, binner,
false);
Sorter.create_permute_vector();
Sorter.sort(element_, begin, end);
Sorter.create_permute_vector(exec);
Sorter.sort(exec, element_, begin, end);
Sorter.sort(x_, begin, end);
Sorter.sort(v_, begin, end);
Sorter.sort(exec, x_, begin, end);
Sorter.sort(exec, v_, begin, end);
Kokkos::deep_copy(h_element, element_);
Kokkos::deep_copy(h_x, x_);
Kokkos::deep_copy(h_v, v_);
Kokkos::deep_copy(exec, h_element, element_);
Kokkos::deep_copy(exec, h_x, x_);
Kokkos::deep_copy(exec, h_v, v_);
exec.fence();
ASSERT_EQ(h_element(0), 9);
ASSERT_EQ(h_element(1), 8);

View File

@ -3,4 +3,8 @@ image:
clone_folder: c:\projects\source
build_script:
- cmd: >-
cmake c:\projects\source -DKokkos_ENABLE_TESTS=ON -DCMAKE_CXX_FLAGS="/W0 /EHsc /d1reportClassLayoutChanges" -DCTEST_ARGS="-C Debug -V --output-on-failure" -DBUILD_NAME=MSVC-2019 -DBUILD_TYPE=Debug -DSITE=AppVeyor -DTARGET=install -P cmake/KokkosCI.cmake
mkdir build &&
cd build &&
cmake c:\projects\source -DKokkos_ENABLE_TESTS=ON -DCMAKE_CXX_FLAGS="/W0 /EHsc" -DKokkos_ENABLE_DEPRECATED_CODE_3=ON -DKokkos_ENABLE_DEPRECATION_WARNINGS=OFF &&
cmake --build . --target install &&
ctest -C Debug --output-on-failure

View File

@ -1,12 +1,12 @@
#include <Kokkos_Core.hpp>
#include <impl/Kokkos_Timer.hpp>
#include <Kokkos_Timer.hpp>
#include <Kokkos_Random.hpp>
template <class Scalar>
double test_atomic(int L, int N, int M, int K, int R,
Kokkos::View<const int*> offsets) {
Kokkos::View<Scalar*> output("Output", N);
Kokkos::Impl::Timer timer;
Kokkos::Timer timer;
for (int r = 0; r < R; r++)
Kokkos::parallel_for(
@ -28,7 +28,7 @@ template <class Scalar>
double test_no_atomic(int L, int N, int M, int K, int R,
Kokkos::View<const int*> offsets) {
Kokkos::View<Scalar*> output("Output", N);
Kokkos::Impl::Timer timer;
Kokkos::Timer timer;
for (int r = 0; r < R; r++)
Kokkos::parallel_for(
L, KOKKOS_LAMBDA(const int& i) {

View File

@ -43,7 +43,7 @@
*/
#include <Kokkos_Core.hpp>
#include <impl/Kokkos_Timer.hpp>
#include <Kokkos_Timer.hpp>
template <class Scalar, int Unroll, int Stride>
struct Run {

View File

@ -43,7 +43,7 @@
*/
#include <Kokkos_Core.hpp>
#include <impl/Kokkos_Timer.hpp>
#include <Kokkos_Timer.hpp>
#include <bench.hpp>
#include <cstdlib>

View File

@ -43,7 +43,7 @@
*/
#include <Kokkos_Core.hpp>
#include <impl/Kokkos_Timer.hpp>
#include <Kokkos_Timer.hpp>
#include <gather.hpp>
#include <cstdlib>

View File

@ -52,35 +52,33 @@
#define HLINE "-------------------------------------------------------------\n"
#if defined(KOKKOS_ENABLE_CUDA)
using StreamHostArray = Kokkos::View<double*, Kokkos::CudaSpace>::HostMirror;
using StreamDeviceArray = Kokkos::View<double*, Kokkos::CudaSpace>;
#else
using StreamHostArray = Kokkos::View<double*, Kokkos::HostSpace>::HostMirror;
using StreamDeviceArray = Kokkos::View<double*, Kokkos::HostSpace>;
#endif
using StreamDeviceArray =
Kokkos::View<double*, Kokkos::MemoryTraits<Kokkos::Restrict>>;
using StreamHostArray = typename StreamDeviceArray::HostMirror;
using StreamIndex = int;
using Policy = Kokkos::RangePolicy<Kokkos::IndexType<StreamIndex>>;
double now() {
struct timeval now;
gettimeofday(&now, nullptr);
return (double)now.tv_sec + ((double)now.tv_usec * 1.0e-6);
}
void perform_copy(StreamDeviceArray& a, StreamDeviceArray& b,
StreamDeviceArray& c) {
void perform_set(StreamDeviceArray& a, const double scalar) {
Kokkos::parallel_for(
"copy", a.extent(0), KOKKOS_LAMBDA(const StreamIndex i) { c[i] = a[i]; });
"set", Policy(0, a.extent(0)),
KOKKOS_LAMBDA(const StreamIndex i) { a[i] = scalar; });
Kokkos::fence();
}
void perform_scale(StreamDeviceArray& a, StreamDeviceArray& b,
StreamDeviceArray& c, const double scalar) {
void perform_copy(StreamDeviceArray& a, StreamDeviceArray& b) {
Kokkos::parallel_for(
"copy", a.extent(0),
"copy", Policy(0, a.extent(0)),
KOKKOS_LAMBDA(const StreamIndex i) { b[i] = a[i]; });
Kokkos::fence();
}
void perform_scale(StreamDeviceArray& b, StreamDeviceArray& c,
const double scalar) {
Kokkos::parallel_for(
"scale", Policy(0, b.extent(0)),
KOKKOS_LAMBDA(const StreamIndex i) { b[i] = scalar * c[i]; });
Kokkos::fence();
@ -89,7 +87,7 @@ void perform_scale(StreamDeviceArray& a, StreamDeviceArray& b,
void perform_add(StreamDeviceArray& a, StreamDeviceArray& b,
StreamDeviceArray& c) {
Kokkos::parallel_for(
"add", a.extent(0),
"add", Policy(0, a.extent(0)),
KOKKOS_LAMBDA(const StreamIndex i) { c[i] = a[i] + b[i]; });
Kokkos::fence();
@ -98,7 +96,7 @@ void perform_add(StreamDeviceArray& a, StreamDeviceArray& b,
void perform_triad(StreamDeviceArray& a, StreamDeviceArray& b,
StreamDeviceArray& c, const double scalar) {
Kokkos::parallel_for(
"triad", a.extent(0),
"triad", Policy(0, a.extent(0)),
KOKKOS_LAMBDA(const StreamIndex i) { a[i] = b[i] + scalar * c[i]; });
Kokkos::fence();
@ -184,6 +182,7 @@ int run_benchmark() {
const double scalar = 3.0;
double setTime = std::numeric_limits<double>::max();
double copyTime = std::numeric_limits<double>::max();
double scaleTime = std::numeric_limits<double>::max();
double addTime = std::numeric_limits<double>::max();
@ -191,13 +190,10 @@ int run_benchmark() {
printf("Initializing Views...\n");
#if defined(KOKKOS_HAVE_OPENMP)
Kokkos::parallel_for(
"init", Kokkos::RangePolicy<Kokkos::OpenMP>(0, STREAM_ARRAY_SIZE),
#else
Kokkos::parallel_for(
"init", Kokkos::RangePolicy<Kokkos::Serial>(0, STREAM_ARRAY_SIZE),
#endif
"init",
Kokkos::RangePolicy<Kokkos::DefaultHostExecutionSpace>(0,
STREAM_ARRAY_SIZE),
KOKKOS_LAMBDA(const int i) {
a[i] = 1.0;
b[i] = 2.0;
@ -209,26 +205,30 @@ int run_benchmark() {
Kokkos::deep_copy(dev_b, b);
Kokkos::deep_copy(dev_c, c);
double start;
printf("Starting benchmarking...\n");
Kokkos::Timer timer;
for (StreamIndex k = 0; k < STREAM_NTIMES; ++k) {
start = now();
perform_copy(dev_a, dev_b, dev_c);
copyTime = std::min(copyTime, (now() - start));
timer.reset();
perform_set(dev_c, 1.5);
setTime = std::min(setTime, timer.seconds());
start = now();
perform_scale(dev_a, dev_b, dev_c, scalar);
scaleTime = std::min(scaleTime, (now() - start));
timer.reset();
perform_copy(dev_a, dev_c);
copyTime = std::min(copyTime, timer.seconds());
start = now();
timer.reset();
perform_scale(dev_b, dev_c, scalar);
scaleTime = std::min(scaleTime, timer.seconds());
timer.reset();
perform_add(dev_a, dev_b, dev_c);
addTime = std::min(addTime, (now() - start));
addTime = std::min(addTime, timer.seconds());
start = now();
timer.reset();
perform_triad(dev_a, dev_b, dev_c, scalar);
triadTime = std::min(triadTime, (now() - start));
triadTime = std::min(triadTime, timer.seconds());
}
Kokkos::deep_copy(a, dev_a);
@ -240,6 +240,9 @@ int run_benchmark() {
printf(HLINE);
printf("Set %11.2f MB/s\n",
(1.0e-06 * 1.0 * (double)sizeof(double) * (double)STREAM_ARRAY_SIZE) /
setTime);
printf("Copy %11.2f MB/s\n",
(1.0e-06 * 2.0 * (double)sizeof(double) * (double)STREAM_ARRAY_SIZE) /
copyTime);

View File

@ -634,15 +634,15 @@ elif [[ ${HPCBIND_HAS_COMMAND} -eq 1 ]]; then
> ${HPCBIND_OUT}
if [[ ${HPCBIND_TEE} -eq 0 ]]; then
if [[ ${HPCBIND_ENABLE_HWLOC_BIND} -eq 1 ]]; then
hwloc-bind "${HPCBIND_HWLOC_CPUSET}" -- $@ > ${HPCBIND_OUT} 2> ${HPCBIND_ERR}
hwloc-bind "${HPCBIND_HWLOC_CPUSET}" -- "$@" > ${HPCBIND_OUT} 2> ${HPCBIND_ERR}
else
eval $@ > ${HPCBIND_OUT} 2> ${HPCBIND_ERR}
eval "$@" > ${HPCBIND_OUT} 2> ${HPCBIND_ERR}
fi
else
if [[ ${HPCBIND_ENABLE_HWLOC_BIND} -eq 1 ]]; then
hwloc-bind "${HPCBIND_HWLOC_CPUSET}" -- $@ > >(tee ${HPCBIND_OUT}) 2> >(tee ${HPCBIND_ERR} >&2)
hwloc-bind "${HPCBIND_HWLOC_CPUSET}" -- "$@" > >(tee ${HPCBIND_OUT}) 2> >(tee ${HPCBIND_ERR} >&2)
else
eval $@ > >(tee ${HPCBIND_OUT}) 2> >(tee ${HPCBIND_ERR} >&2)
eval "$@" > >(tee ${HPCBIND_OUT}) 2> >(tee ${HPCBIND_ERR} >&2)
fi
fi
fi

View File

@ -96,10 +96,10 @@ replace_pragma_ident=0
first_xcompiler_arg=1
# Allow for setting temp dir without setting TMPDIR in parent (see https://docs.olcf.ornl.gov/systems/summit_user_guide.html#setting-tmpdir-causes-jsm-jsrun-errors-job-state-flip-flop)
if [[ ! -z ${NVCC_WRAPPER_TMPDIR+x} ]]; then
if [[ -z ${NVCC_WRAPPER_TMPDIR+x} ]]; then
temp_dir=${TMPDIR:-/tmp}
else
temp_dir=${NVCC_WRAPPER_TMPDIR+x}
temp_dir=${NVCC_WRAPPER_TMPDIR}
fi
# optimization flag added as a command-line argument
@ -226,14 +226,14 @@ do
cuda_args="$cuda_args $1"
;;
#Handle more known nvcc args
--expt-extended-lambda|--expt-relaxed-constexpr|--Wno-deprecated-gpu-targets|-Wno-deprecated-gpu-targets)
--expt-extended-lambda|--expt-relaxed-constexpr|--Wno-deprecated-gpu-targets|-Wno-deprecated-gpu-targets|-allow-unsupported-compiler|--allow-unsupported-compiler)
cuda_args="$cuda_args $1"
;;
#Handle known nvcc args that have an argument
-maxrregcount=*|--maxrregcount=*)
-maxrregcount=*|--maxrregcount=*|-time=*)
cuda_args="$cuda_args $1"
;;
-maxrregcount|--default-stream|-Xnvlink|--fmad|-cudart|--cudart|-include)
-maxrregcount|--default-stream|-Xnvlink|--fmad|-cudart|--cudart|-include|-time)
cuda_args="$cuda_args $1 $2"
shift
;;
@ -552,14 +552,14 @@ if [ $host_only -eq 1 ]; then
$host_command
elif [ -n "$nvcc_depfile_command" ]; then
if [ "$NVCC_WRAPPER_SHOW_COMMANDS_BEING_RUN" == "1" ] ; then
echo "$nvcc_command && $nvcc_depfile_command"
echo "TMPDIR=${temp_dir} $nvcc_command && TMPDIR=${temp_dir} $nvcc_depfile_command"
fi
$nvcc_command && $nvcc_depfile_command
TMPDIR=${temp_dir} $nvcc_command && TMPDIR=${temp_dir} $nvcc_depfile_command
else
if [ "$NVCC_WRAPPER_SHOW_COMMANDS_BEING_RUN" == "1" ] ; then
echo "$nvcc_command"
echo "TMPDIR=${temp_dir} $nvcc_command"
fi
$nvcc_command
TMPDIR=${temp_dir} $nvcc_command
fi
error_code=$?

View File

@ -1,91 +0,0 @@
#----------------------------------------------------------------------------------------#
#
# CTestConfig.cmake template for Kokkos
#
#----------------------------------------------------------------------------------------#
#
# dash-board related
#
set(CTEST_PROJECT_NAME "Kokkos")
set(CTEST_NIGHTLY_START_TIME "01:00:00 UTC")
set(CTEST_DROP_METHOD "https")
set(CTEST_DROP_SITE "cdash.nersc.gov")
set(CTEST_DROP_LOCATION "/submit.php?project=${CTEST_PROJECT_NAME}")
set(CTEST_CDASH_VERSION "1.6")
set(CTEST_CDASH_QUERY_VERSION TRUE)
set(CTEST_SUBMIT_RETRY_COUNT "1")
set(CTEST_SUBMIT_RETRY_DELAY "30")
#
# configure/build related
#
set(CTEST_BUILD_NAME "@BUILD_NAME@")
set(CTEST_MODEL "@MODEL@")
set(CTEST_SITE "@SITE@")
set(CTEST_CONFIGURATION_TYPE "@BUILD_TYPE@")
set(CTEST_SOURCE_DIRECTORY "@SOURCE_REALDIR@")
set(CTEST_BINARY_DIRECTORY "@BINARY_REALDIR@")
#
# configure/build related
#
set(CTEST_UPDATE_TYPE "git")
set(CTEST_UPDATE_VERSION_ONLY ON)
# set(CTEST_GENERATOR "")
# set(CTEST_GENERATOR_PLATFORM "")
#
# testing related
#
set(CTEST_TIMEOUT "7200")
set(CTEST_TEST_TIMEOUT "7200")
set(CTEST_CUSTOM_MAXIMUM_NUMBER_OF_ERRORS "100")
set(CTEST_CUSTOM_MAXIMUM_NUMBER_OF_WARNINGS "100")
set(CTEST_CUSTOM_MAXIMUM_PASSED_TEST_OUTPUT_SIZE "1048576")
#
# coverage related
#
set(CTEST_CUSTOM_COVERAGE_EXCLUDE ".*tpls/.*;/usr/.*;.*unit_test/.*;.*unit_tests/.*;.*perf_test/.*")
#
# commands
#
if(NOT "@CHECKOUT_COMMAND@" STREQUAL "")
set(CTEST_CHECKOUT_COMMAND "@CHECKOUT_COMMAND@")
endif()
set(CTEST_UPDATE_COMMAND "@GIT_EXECUTABLE@")
set(CTEST_CONFIGURE_COMMAND "@CMAKE_COMMAND@ -DCMAKE_BUILD_TYPE=@BUILD_TYPE@ -DKokkos_ENABLE_TESTS=ON @CONFIG_ARGS@ @SOURCE_REALDIR@")
set(CTEST_BUILD_COMMAND "@CMAKE_COMMAND@ --build @BINARY_REALDIR@ --target @TARGET@")
if(NOT WIN32)
set(CTEST_BUILD_COMMAND "${CTEST_BUILD_COMMAND} -- -j@BUILD_JOBS@")
endif()
set(CTEST_COVERAGE_COMMAND "gcov")
set(CTEST_MEMORYCHECK_COMMAND "valgrind")
set(CTEST_GIT_COMMAND "@GIT_EXECUTABLE@")
#
# various configs
#
set(APPEND_VALUE @APPEND@)
if(APPEND_VALUE)
set(APPEND_CTEST APPEND)
endif()
macro(SET_TEST_PROP VAR)
if(NOT "${ARGS}" STREQUAL "")
set(${VAR}_CTEST ${VAR} ${ARGN})
endif()
endmacro()
set_test_prop(START @START@)
set_test_prop(END @END@)
set_test_prop(STRIDE @STRIDE@)
set_test_prop(INCLUDE @INCLUDE@)
set_test_prop(EXCLUDE @EXCLUDE@)
set_test_prop(INCLUDE_LABEL @INCLUDE_LABEL@)
set_test_prop(EXCLUDE_LABEL @EXCLUDE_LABEL@)
set_test_prop(PARALLEL_LEVEL @PARALLEL_LEVEL@)
set_test_prop(STOP_TIME @STOP_TIME@)
set_test_prop(COVERAGE_LABELS @LABELS@)

View File

@ -1,350 +0,0 @@
cmake_minimum_required(VERSION 3.16 FATAL_ERROR)
message(STATUS "")
get_cmake_property(_cached_vars CACHE_VARIABLES)
set(KOKKOS_CMAKE_ARGS)
set(EXCLUDED_VARIABLES "CMAKE_COMMAND" "CMAKE_CPACK_COMMAND" "CMAKE_CTEST_COMMAND" "CMAKE_ROOT"
"CTEST_ARGS" "BUILD_NAME" "CMAKE_CXX_FLAGS" "CMAKE_BUILD_TYPE")
list(SORT _cached_vars)
foreach(_var ${_cached_vars})
if(NOT "${_var}" IN_LIST EXCLUDED_VARIABLES)
list(APPEND KOKKOS_CMAKE_ARGS ${_var})
if("${_var}" STREQUAL "CMAKE_BUILD_TYPE")
set(BUILD_TYPE "${CMAKE_BUILD_TYPE}")
endif()
endif()
endforeach()
#----------------------------------------------------------------------------------------#
#
# Macros and variables
#
#----------------------------------------------------------------------------------------#
macro(CHECK_REQUIRED VAR)
if(NOT DEFINED ${VAR})
message(FATAL_ERROR "Error! Variable '${VAR}' must be defined")
endif()
endmacro()
# require the build name variable
CHECK_REQUIRED(BUILD_NAME)
# uses all args
macro(SET_DEFAULT VAR)
if(NOT DEFINED ${VAR})
set(${VAR} ${ARGN})
endif()
# remove these ctest configuration variables from the defines
# passed to the Kokkos configuration
if("${VAR}" IN_LIST KOKKOS_CMAKE_ARGS)
list(REMOVE_ITEM KOKKOS_CMAKE_ARGS "${VAR}")
endif()
endmacro()
# uses first arg -- useful for selecting via priority from multiple
# potentially defined variables, e.g.:
#
# set_default_arg1(BUILD_NAME ${TRAVIS_BUILD_NAME} ${BUILD_NAME})
#
macro(SET_DEFAULT_ARG1 VAR)
if(NOT DEFINED ${VAR})
foreach(_ARG ${ARGN})
if(NOT "${_ARG}" STREQUAL "")
set(${VAR} ${_ARG})
break()
endif()
endforeach()
endif()
# remove these ctest configuration variables from the defines
# passed to the Kokkos configuration
if("${VAR}" IN_LIST KOKKOS_CMAKE_ARGS)
list(REMOVE_ITEM KOKKOS_CMAKE_ARGS "${VAR}")
endif()
endmacro()
# determine the default working directory
if(NOT "$ENV{WORKSPACE}" STREQUAL "")
set(WORKING_DIR "$ENV{WORKSPACE}")
else()
get_filename_component(WORKING_DIR ${CMAKE_CURRENT_LIST_DIR} DIRECTORY)
endif()
# determine the hostname
execute_process(COMMAND hostname
OUTPUT_VARIABLE HOSTNAME
OUTPUT_STRIP_TRAILING_WHITESPACE)
SET_DEFAULT(HOSTNAME "$ENV{HOSTNAME}")
# get the number of processors
include(ProcessorCount)
ProcessorCount(NUM_PROCESSORS)
# find git
find_package(Git QUIET)
if(NOT GIT_EXECUTABLE)
unset(GIT_EXECUTABLE CACHE)
unset(GIT_EXECUTABLE)
endif()
function(EXECUTE_GIT_COMMAND VAR)
set(${VAR} "" PARENT_SCOPE)
execute_process(COMMAND ${GIT_EXECUTABLE} ${ARGN}
OUTPUT_VARIABLE VAL
RESULT_VARIABLE RET
OUTPUT_STRIP_TRAILING_WHITESPACE
WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR}
ERROR_QUIET)
string(REPLACE ";" " " _CMD "${GIT_EXECUTABLE} ${ARGN}")
set(LAST_GIT_COMMAND "${_CMD}" PARENT_SCOPE)
if(RET EQUAL 0)
set(${VAR} "${VAL}" PARENT_SCOPE)
endif()
endfunction()
# just gets the git branch name if available
function(GET_GIT_BRANCH_NAME VAR)
execute_git_command(GIT_BRANCH branch --show-current)
set(_INVALID "%D" "HEAD")
if(NOT GIT_BRANCH OR "${GIT_BRANCH}" IN_LIST _INVALID)
execute_git_command(GIT_BRANCH show -s --format=%D)
if(NOT GIT_BRANCH OR "${GIT_BRANCH}" IN_LIST _INVALID)
execute_git_command(GIT_BRANCH --describe all)
endif()
endif()
#
if(GIT_BRANCH)
string(REPLACE " " ";" _DESC "${GIT_BRANCH}")
# just set it to last one via loop instead of wonky cmake index manip
foreach(_ITR ${_DESC})
set(GIT_BRANCH "${_ITR}")
endforeach()
set(${VAR} "${GIT_BRANCH}" PARENT_SCOPE)
message(STATUS "GIT BRANCH via '${LAST_GIT_COMMAND}': ${GIT_BRANCH}")
endif()
endfunction()
# just gets the git branch name if available
function(GET_GIT_AUTHOR_NAME VAR)
execute_git_command(GIT_AUTHOR show -s --format=%an)
if(GIT_AUTHOR)
string(LENGTH "${GIT_AUTHOR}" STRLEN)
# if the build name gets too long, this can cause submission errors
if(STRLEN GREATER 24)
# remove middle initial
string(REGEX REPLACE " [A-Z]\. " " " GIT_AUTHOR "${GIT_AUTHOR}")
# get first and sur name
string(REGEX REPLACE "([A-Za-z]+) ([A-Za-z]+)" "\\1" F_NAME "${GIT_AUTHOR}")
string(REGEX REPLACE "([A-Za-z]+) ([A-Za-z]+)" "\\2" S_NAME "${GIT_AUTHOR}")
if(S_NAME)
set(GIT_AUTHOR "${S_NAME}")
elseif(F_NAME)
set(GIT_AUTHOR "${F_NAME}")
endif()
endif()
# remove any spaces, quotes, periods, etc.
string(REGEX REPLACE "[ ',;_\.\"]+" "" GIT_AUTHOR "${GIT_AUTHOR}")
set(${VAR} "${GIT_AUTHOR}" PARENT_SCOPE)
message(STATUS "GIT AUTHOR via '${LAST_GIT_COMMAND}': ${GIT_AUTHOR}")
endif()
endfunction()
# get the name of the branch
GET_GIT_BRANCH_NAME(GIT_BRANCH)
# get the name of the author
GET_GIT_AUTHOR_NAME(GIT_AUTHOR)
# author, prefer git method for consistency
SET_DEFAULT_ARG1(AUTHOR ${GIT_AUTHOR} $ENV{GIT_AUTHOR} $ENV{AUTHOR})
# SLUG == owner_name/repo_name
SET_DEFAULT_ARG1(SLUG $ENV{TRAVIS_PULL_REQUEST_SLUG} $ENV{TRAVIS_REPO_SLUG} $ENV{APPVEYOR_REPO_NAME} $ENV{PULL_REQUEST_SLUG} $ENV{REPO_SLUG})
# branch name
SET_DEFAULT_ARG1(BRANCH $ENV{TRAVIS_PULL_REQUEST_BRANCH} $ENV{TRAVIS_BRANCH} $ENV{APPVEYOR_PULL_REQUEST_HEAD_REPO_BRANCH} $ENV{APPVEYOR_REPO_BRANCH} $ENV{GIT_BRANCH} $ENV{BRANCH_NAME} $ENV{BRANCH} ${GIT_BRANCH})
# pull request number
SET_DEFAULT_ARG1(PULL_REQUEST_NUM $ENV{TRAVIS_PULL_REQUEST} $ENV{CHANGE_ID} $ENV{APPVEYOR_PULL_REQUEST_NUMBER} $ENV{PULL_REQUEST_NUM})
# get the event type, e.g. push, pull_request, api, cron, etc.
SET_DEFAULT_ARG1(EVENT_TYPE $ENV{TRAVIS_EVENT_TYPE} ${EVENT_TYPE})
if("${BRANCH}" STREQUAL "")
message(STATUS "Checked: environment variables for Travis, Appveyor, Jenkins (git plugin), BRANCH_NAME, BRANCH and 'git branch --show-current'")
message(FATAL_ERROR "Error! Git branch could not be determined. Please provide -DBRANCH=<name>")
endif()
#----------------------------------------------------------------------------------------#
#
# Set default values if not provided on command-line
#
#----------------------------------------------------------------------------------------#
SET_DEFAULT(SOURCE_DIR "${WORKING_DIR}") # source directory
SET_DEFAULT(BINARY_DIR "${WORKING_DIR}/build") # build directory
SET_DEFAULT(BUILD_TYPE "${CMAKE_BUILD_TYPE}") # Release, Debug, etc.
SET_DEFAULT(MODEL "Continuous") # Continuous, Nightly, or Experimental
SET_DEFAULT(JOBS 1) # number of parallel ctests
SET_DEFAULT(CTEST_COMMAND "${CMAKE_CTEST_COMMAND}") # just in case
SET_DEFAULT(CTEST_ARGS "-V --output-on-failure") # extra arguments when ctest is called
SET_DEFAULT(GIT_EXECUTABLE "git") # ctest_update
SET_DEFAULT(TARGET "all") # build target
SET_DEFAULT_ARG1(SITE "$ENV{SITE}"
"${HOSTNAME}") # update site
SET_DEFAULT_ARG1(BUILD_JOBS "$ENV{BUILD_JOBS}"
"${NUM_PROCESSORS}") # number of parallel compile jobs
#
# The variable below correspond to ctest arguments, i.e. START,END,STRIDE are
# '-I START,END,STRIDE'
#
SET_DEFAULT(START "")
SET_DEFAULT(END "")
SET_DEFAULT(STRIDE "")
SET_DEFAULT(INCLUDE "")
SET_DEFAULT(EXCLUDE "")
SET_DEFAULT(INCLUDE_LABEL "")
SET_DEFAULT(EXCLUDE_LABEL "")
SET_DEFAULT(PARALLEL_LEVEL "")
SET_DEFAULT(STOP_TIME "")
SET_DEFAULT(LABELS "")
SET_DEFAULT(NOTES "")
# default static build tag for Nightly
set(BUILD_TAG "${BRANCH}")
if(NOT BUILD_TYPE)
# default for kokkos if not specified
set(BUILD_TYPE "RelWithDebInfo")
endif()
# generate dynamic name if continuous or experimental model
if(NOT "${MODEL}" STREQUAL "Nightly")
if(EVENT_TYPE AND PULL_REQUEST_NUM)
# e.g. pull_request/123
if(AUTHOR)
set(BUILD_TAG "${AUTHOR}/${EVENT_TYPE}/${PULL_REQUEST_NUM}")
else()
set(BUILD_TAG "${EVENT_TYPE}/${PULL_REQUEST_NUM}")
endif()
elseif(SLUG)
# e.g. owner_name/repo_name
set(BUILD_TAG "${SLUG}")
elseif(AUTHOR)
set(BUILD_TAG "${AUTHOR}/${BRANCH}")
endif()
if(EVENT_TYPE AND NOT PULL_REQUEST_NUM)
set(BUILD_TAG "${BUILD_TAG}-${EVENT_TYPE}")
endif()
endif()
# unnecessary
string(REPLACE "/remotes/" "/" BUILD_TAG "${BUILD_TAG}")
string(REPLACE "/origin/" "/" BUILD_TAG "${BUILD_TAG}")
message(STATUS "BUILD_TAG: ${BUILD_TAG}")
set(BUILD_NAME "[${BUILD_TAG}] [${BUILD_NAME}-${BUILD_TYPE}]")
# colons in build name create extra (empty) entries in CDash
string(REPLACE ":" "-" BUILD_NAME "${BUILD_NAME}")
# unnecessary info
string(REPLACE "/merge]" "]" BUILD_NAME "${BUILD_NAME}")
# consistency
string(REPLACE "/pr/" "/pull/" BUILD_NAME "${BUILD_NAME}")
string(REPLACE "pull_request/" "pull/" BUILD_NAME "${BUILD_NAME}")
# miscellaneous from missing fields
string(REPLACE "--" "-" BUILD_NAME "${BUILD_NAME}")
string(REPLACE "-]" "]" BUILD_NAME "${BUILD_NAME}")
# check binary directory
if(EXISTS ${BINARY_DIR})
if(NOT IS_DIRECTORY "${BINARY_DIR}")
message(FATAL_ERROR "Error! '${BINARY_DIR}' already exists and is not a directory!")
endif()
file(GLOB BINARY_DIR_FILES "${BINARY_DIR}/*")
if(NOT "${BINARY_DIR_FILES}" STREQUAL "")
message(FATAL_ERROR "Error! '${BINARY_DIR}' already exists and is not empty!")
endif()
endif()
get_filename_component(SOURCE_REALDIR ${SOURCE_DIR} REALPATH)
get_filename_component(BINARY_REALDIR ${BINARY_DIR} REALPATH)
#----------------------------------------------------------------------------------------#
#
# Generate the CTestConfig.cmake
#
#----------------------------------------------------------------------------------------#
set(CONFIG_ARGS)
foreach(_ARG ${KOKKOS_CMAKE_ARGS})
if(NOT "${${_ARG}}" STREQUAL "")
get_property(_ARG_TYPE CACHE ${_ARG} PROPERTY TYPE)
if("${_ARG_TYPE}" STREQUAL "UNINITIALIZED")
if("${${_ARG}}" STREQUAL "ON" OR "${${_ARG}}" STREQUAL "OFF")
set(_ARG_TYPE "BOOL")
elseif(EXISTS "${${_ARG}}" AND NOT IS_DIRECTORY "${${_ARG}}")
set(_ARG_TYPE "FILEPATH")
elseif(EXISTS "${${_ARG}}" AND IS_DIRECTORY "${${_ARG}}")
set(_ARG_TYPE "PATH")
elseif(NOT "${${_ARG}}" STREQUAL "")
set(_ARG_TYPE "STRING")
endif()
endif()
set(CONFIG_ARGS "${CONFIG_ARGS}set(${_ARG} \"${${_ARG}}\" CACHE ${_ARG_TYPE} \"\")\n")
endif()
endforeach()
file(WRITE ${BINARY_REALDIR}/initial-cache.cmake
"
set(CMAKE_CXX_FLAGS \"${CMAKE_CXX_FLAGS}\" CACHE STRING \"\")
${CONFIG_ARGS}
")
file(READ ${BINARY_REALDIR}/initial-cache.cmake _CACHE_INFO)
message(STATUS "Initial cache:\n${_CACHE_INFO}")
# initialize the cache
set(CONFIG_ARGS "-C ${BINARY_REALDIR}/initial-cache.cmake")
# generate the CTestConfig.cmake
configure_file(
${CMAKE_CURRENT_LIST_DIR}/CTestConfig.cmake.in
${BINARY_REALDIR}/CTestConfig.cmake
@ONLY)
# copy/generate the dashboard script
configure_file(
${CMAKE_CURRENT_LIST_DIR}/KokkosCTest.cmake.in
${BINARY_REALDIR}/KokkosCTest.cmake
@ONLY)
# custom CTest settings go in ${BINARY_DIR}/CTestCustom.cmake
execute_process(
COMMAND ${CMAKE_COMMAND} -E touch CTestCustom.cmake
WORKING_DIRECTORY ${BINARY_REALDIR}
)
#----------------------------------------------------------------------------------------#
#
# Execute CTest
#
#----------------------------------------------------------------------------------------#
message(STATUS "")
message(STATUS "BUILD_NAME: ${BUILD_NAME}")
message(STATUS "Executing '${CTEST_COMMAND} -S KokkosCTest.cmake ${CTEST_ARGS}'...")
message(STATUS "")
# e.g. -DCTEST_ARGS="--output-on-failure -VV" should really be -DCTEST_ARGS="--output-on-failure;-VV"
string(REPLACE " " ";" CTEST_ARGS "${CTEST_ARGS}")
execute_process(
COMMAND ${CTEST_COMMAND} -S KokkosCTest.cmake ${CTEST_ARGS}
RESULT_VARIABLE RET
WORKING_DIRECTORY ${BINARY_REALDIR}
)
# ensure that any non-zero result variable gets propagated
if(NOT RET EQUAL 0)
message(FATAL_ERROR "CTest return non-zero exit code: ${RET}")
endif()

View File

@ -1,261 +0,0 @@
cmake_minimum_required(VERSION 3.16 FATAL_ERROR)
if(EXISTS "${CMAKE_CURRENT_LIST_DIR}/CTestConfig.cmake")
include("${CMAKE_CURRENT_LIST_DIR}/CTestConfig.cmake")
endif()
include(ProcessorCount)
ProcessorCount(CTEST_PROCESSOR_COUNT)
cmake_policy(SET CMP0009 NEW)
cmake_policy(SET CMP0011 NEW)
# ---------------------------------------------------------------------------- #
# -- Commands
# ---------------------------------------------------------------------------- #
find_program(CTEST_CMAKE_COMMAND NAMES cmake)
find_program(CTEST_UNAME_COMMAND NAMES uname)
find_program(CTEST_BZR_COMMAND NAMES bzr)
find_program(CTEST_CVS_COMMAND NAMES cvs)
find_program(CTEST_GIT_COMMAND NAMES git)
find_program(CTEST_HG_COMMAND NAMES hg)
find_program(CTEST_P4_COMMAND NAMES p4)
find_program(CTEST_SVN_COMMAND NAMES svn)
find_program(VALGRIND_COMMAND NAMES valgrind)
find_program(GCOV_COMMAND NAMES gcov)
find_program(LCOV_COMMAND NAMES llvm-cov)
find_program(MEMORYCHECK_COMMAND NAMES valgrind )
set(MEMORYCHECK_TYPE Valgrind)
# set(MEMORYCHECK_TYPE Purify)
# set(MEMORYCHECK_TYPE BoundsChecker)
# set(MEMORYCHECK_TYPE ThreadSanitizer)
# set(MEMORYCHECK_TYPE AddressSanitizer)
# set(MEMORYCHECK_TYPE LeakSanitizer)
# set(MEMORYCHECK_TYPE MemorySanitizer)
# set(MEMORYCHECK_TYPE UndefinedBehaviorSanitizer)
set(MEMORYCHECK_COMMAND_OPTIONS "--trace-children=yes --leak-check=full")
# ---------------------------------------------------------------------------- #
# -- Settings
# ---------------------------------------------------------------------------- #
## -- Process timeout in seconds
set(CTEST_TIMEOUT "7200")
## -- Set output to English
set(ENV{LC_MESSAGES} "en_EN" )
# ---------------------------------------------------------------------------- #
# -- Copy ctest configuration file
# ---------------------------------------------------------------------------- #
macro(COPY_CTEST_CONFIG_FILES)
foreach(_FILE CTestConfig.cmake CTestCustom.cmake)
# if current directory is not binary or source directory
if(NOT "${CMAKE_CURRENT_LIST_DIR}" STREQUAL "${CTEST_BINARY_DIRECTORY}" AND
NOT "${CTEST_SOURCE_DIRECTORY}" STREQUAL "${CTEST_BINARY_DIRECTORY}")
# if file exists in current directory
if(EXISTS ${CMAKE_CURRENT_LIST_DIR}/${_FILE})
configure_file(${CMAKE_CURRENT_LIST_DIR}/${_FILE}
${CTEST_BINARY_DIRECTORY}/${_FILE} COPYONLY)
endif()
# if source and binary differ
elseif(NOT "${CTEST_SOURCE_DIRECTORY}" STREQUAL "${CTEST_BINARY_DIRECTORY}")
# if file exists in source directory but not in binary directory
if(EXISTS ${CTEST_SOURCE_DIRECTORY}/${_FILE} AND
NOT EXISTS ${CTEST_BINARY_DIRECTORY}/${_FILE})
configure_file(${CTEST_SOURCE_DIRECTORY}/${_FILE}
${CTEST_BINARY_DIRECTORY}/${_FILE} COPYONLY)
endif()
endif()
endforeach()
endmacro()
ctest_read_custom_files("${CMAKE_CURRENT_LIST_DIR}")
message(STATUS "CTEST_MODEL: ${CTEST_MODEL}")
#-------------------------------------------------------------------------#
# Start
#
message(STATUS "")
message(STATUS "[${CTEST_BUILD_NAME}] Running START_CTEST stage...")
message(STATUS "")
ctest_start(${CTEST_MODEL} TRACK ${CTEST_MODEL} ${APPEND_CTEST}
${CTEST_SOURCE_DIRECTORY} ${CTEST_BINARY_DIRECTORY})
#-------------------------------------------------------------------------#
# Config
#
copy_ctest_config_files()
ctest_read_custom_files("${CTEST_BINARY_DIRECTORY}")
#-------------------------------------------------------------------------#
# Update
#
message(STATUS "")
message(STATUS "[${CTEST_BUILD_NAME}] Running CTEST_UPDATE stage...")
message(STATUS "")
ctest_update(SOURCE "${CTEST_SOURCE_DIRECTORY}"
RETURN_VALUE up_ret)
#-------------------------------------------------------------------------#
# Configure
#
message(STATUS "")
message(STATUS "[${CTEST_BUILD_NAME}] Running CTEST_CONFIGURE stage...")
message(STATUS "")
ctest_configure(BUILD "${CTEST_BINARY_DIRECTORY}"
SOURCE ${CTEST_SOURCE_DIRECTORY}
${APPEND_CTEST}
OPTIONS "${CTEST_CONFIGURE_OPTIONS}"
RETURN_VALUE config_ret)
#-------------------------------------------------------------------------#
# Echo configure log bc Damien wants to delay merging this PR for eternity
#
file(GLOB _configure_log "${CTEST_BINARY_DIRECTORY}/Testing/Temporary/LastConfigure*.log")
# should only have one but loop just for safety
foreach(_LOG ${_configure_log})
file(READ ${_LOG} _LOG_MESSAGE)
message(STATUS "Configure Log: ${_LOG}")
message(STATUS "\n${_LOG_MESSAGE}\n")
endforeach()
#-------------------------------------------------------------------------#
# Build
#
message(STATUS "")
message(STATUS "[${CTEST_BUILD_NAME}] Running CTEST_BUILD stage...")
message(STATUS "")
ctest_build(BUILD "${CTEST_BINARY_DIRECTORY}"
${APPEND_CTEST}
RETURN_VALUE build_ret)
#-------------------------------------------------------------------------#
# Echo build log bc Damien wants to delay merging this PR for eternity
#
file(GLOB _build_log "${CTEST_BINARY_DIRECTORY}/Testing/Temporary/LastBuild*.log")
# should only have one but loop just for safety
foreach(_LOG ${_build_log})
file(READ ${_LOG} _LOG_MESSAGE)
message(STATUS "Build Log: ${_LOG}")
message(STATUS "\n${_LOG_MESSAGE}\n")
endforeach()
#-------------------------------------------------------------------------#
# Test
#
message(STATUS "")
message(STATUS "[${CTEST_BUILD_NAME}] Running CTEST_TEST stage...")
message(STATUS "")
ctest_test(RETURN_VALUE test_ret
${APPEND_CTEST}
${START_CTEST}
${END_CTEST}
${STRIDE_CTEST}
${INCLUDE_CTEST}
${EXCLUDE_CTEST}
${INCLUDE_LABEL_CTEST}
${EXCLUDE_LABEL_CTEST}
${PARALLEL_LEVEL_CTEST}
${STOP_TIME_CTEST}
SCHEDULE_RANDOM OFF)
#-------------------------------------------------------------------------#
# Coverage
#
message(STATUS "")
message(STATUS "[${CTEST_BUILD_NAME}] Running CTEST_COVERAGE stage...")
message(STATUS "")
execute_process(COMMAND ${CTEST_COVERAGE_COMMAND} ${CTEST_COVERAGE_EXTRA_FLAGS}
WORKING_DIRECTORY ${CTEST_BINARY_DIRECTORY}
ERROR_QUIET)
ctest_coverage(${APPEND_CTEST}
${CTEST_COVERAGE_LABELS}
RETURN_VALUE cov_ret)
#-------------------------------------------------------------------------#
# MemCheck
#
message(STATUS "")
message(STATUS "[${CTEST_BUILD_NAME}] Running CTEST_MEMCHECK stage...")
message(STATUS "")
ctest_memcheck(RETURN_VALUE mem_ret
${APPEND_CTEST}
${START_CTEST}
${END_CTEST}
${STRIDE_CTEST}
${INCLUDE_CTEST}
${EXCLUDE_CTEST}
${INCLUDE_LABEL_CTEST}
${EXCLUDE_LABEL_CTEST}
${PARALLEL_LEVEL_CTEST})
#-------------------------------------------------------------------------#
# Submit
#
message(STATUS "")
message(STATUS "[${CTEST_BUILD_NAME}] Running CTEST_SUBMIT stage...")
message(STATUS "")
file(GLOB_RECURSE NOTE_FILES "${CTEST_BINARY_DIRECTORY}/*CTestNotes.cmake")
foreach(_FILE ${NOTE_FILES})
message(STATUS "Including CTest notes files: \"${_FILE}\"...")
include("${_FILE}")
endforeach()
# capture submit error so it doesn't fail because of a submission error
ctest_submit(RETURN_VALUE submit_ret
RETRY_COUNT 2
RETRY_DELAY 10
CAPTURE_CMAKE_ERROR submit_err)
#-------------------------------------------------------------------------#
# Submit
#
message(STATUS "")
message(STATUS "[${CTEST_BUILD_NAME}] Finished ${CTEST_MODEL} Stages (${STAGES})")
message(STATUS "")
#-------------------------------------------------------------------------#
# Non-zero exit codes for important errors
#
if(NOT config_ret EQUAL 0)
message(FATAL_ERROR "Error during configuration! Exit code: ${config_ret}")
endif()
if(NOT build_ret EQUAL 0)
message(FATAL_ERROR "Error during build! Exit code: ${build_ret}")
endif()
if(NOT test_ret EQUAL 0)
message(FATAL_ERROR "Error during testing! Exit code: ${test_ret}")
endif()

View File

@ -41,6 +41,7 @@
#cmakedefine KOKKOS_ENABLE_CUDA_LAMBDA
#cmakedefine KOKKOS_ENABLE_CUDA_CONSTEXPR
#cmakedefine KOKKOS_ENABLE_CUDA_LDG_INTRINSIC
#cmakedefine KOKKOS_ENABLE_IMPL_CUDA_MALLOC_ASYNC
#cmakedefine KOKKOS_ENABLE_HIP_RELOCATABLE_DEVICE_CODE
#cmakedefine KOKKOS_ENABLE_HPX_ASYNC_DISPATCH
#cmakedefine KOKKOS_ENABLE_DEBUG
@ -49,17 +50,21 @@
#cmakedefine KOKKOS_ENABLE_COMPILER_WARNINGS
#cmakedefine KOKKOS_ENABLE_PROFILING_LOAD_PRINT
#cmakedefine KOKKOS_ENABLE_TUNING
#cmakedefine KOKKOS_ENABLE_DEPRECATED_CODE
#cmakedefine KOKKOS_ENABLE_DEPRECATED_CODE_3
#cmakedefine KOKKOS_ENABLE_DEPRECATION_WARNINGS
#cmakedefine KOKKOS_ENABLE_LARGE_MEM_TESTS
#cmakedefine KOKKOS_ENABLE_DUALVIEW_MODIFY_CHECK
#cmakedefine KOKKOS_ENABLE_COMPLEX_ALIGN
#cmakedefine KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION
#cmakedefine KOKKOS_ENABLE_IMPL_DESUL_ATOMICS
#cmakedefine KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION // deprecated
#cmakedefine KOKKOS_ENABLE_AGGRESSIVE_VECTORIZATION
/* TPL Settings */
#cmakedefine KOKKOS_ENABLE_HWLOC
#cmakedefine KOKKOS_USE_LIBRT
#cmakedefine KOKKOS_ENABLE_HBWSPACE
#cmakedefine KOKKOS_ENABLE_LIBDL
#cmakedefine KOKKOS_ENABLE_LIBQUADMATH
#cmakedefine KOKKOS_IMPL_CUDA_CLANG_WORKAROUND
#cmakedefine KOKKOS_COMPILER_CUDA_VERSION @KOKKOS_COMPILER_CUDA_VERSION@
@ -79,6 +84,12 @@
#cmakedefine KOKKOS_ARCH_POWER8
#cmakedefine KOKKOS_ARCH_POWER9
#cmakedefine KOKKOS_ARCH_INTEL_GEN
#cmakedefine KOKKOS_ARCH_INTEL_DG1
#cmakedefine KOKKOS_ARCH_INTEL_GEN9
#cmakedefine KOKKOS_ARCH_INTEL_GEN11
#cmakedefine KOKKOS_ARCH_INTEL_GEN12LP
#cmakedefine KOKKOS_ARCH_INTEL_XEHP
#cmakedefine KOKKOS_ARCH_INTEL_GPU
#cmakedefine KOKKOS_ARCH_KEPLER
#cmakedefine KOKKOS_ARCH_KEPLER30
#cmakedefine KOKKOS_ARCH_KEPLER32
@ -95,6 +106,7 @@
#cmakedefine KOKKOS_ARCH_VOLTA70
#cmakedefine KOKKOS_ARCH_VOLTA72
#cmakedefine KOKKOS_ARCH_TURING75
#cmakedefine KOKKOS_ARCH_AMPERE
#cmakedefine KOKKOS_ARCH_AMPERE80
#cmakedefine KOKKOS_ARCH_AMPERE86
#cmakedefine KOKKOS_ARCH_AMD_ZEN

View File

@ -29,7 +29,12 @@ ELSE()
ENDIF()
include(FindPackageHandleStandardArgs)
FIND_PACKAGE_HANDLE_STANDARD_ARGS(TPLCUDA DEFAULT_MSG FOUND_CUDART FOUND_CUDA_DRIVER)
IF(KOKKOS_CXX_HOST_COMPILER_ID STREQUAL PGI)
SET(KOKKOS_CUDA_ERROR "Using NVHPC as host compiler requires at least CMake 3.20.1")
ELSE()
SET(KOKKOS_CUDA_ERROR DEFAULT_MSG)
ENDIF()
FIND_PACKAGE_HANDLE_STANDARD_ARGS(TPLCUDA ${KOKKOS_CUDA_ERROR} FOUND_CUDART FOUND_CUDA_DRIVER)
IF (FOUND_CUDA_DRIVER AND FOUND_CUDART)
KOKKOS_CREATE_IMPORTED_TPL(CUDA INTERFACE
LINK_LIBRARIES CUDA::cuda_driver CUDA::cudart

View File

@ -0,0 +1 @@
KOKKOS_FIND_IMPORTED(LIBQUADMATH HEADER quadmath.h LIBRARY quadmath)

View File

@ -0,0 +1,46 @@
# @HEADER
# ************************************************************************
#
# Kokkos v. 3.0
# Copyright (2020) National Technology & Engineering
# Solutions of Sandia, LLC (NTESS).
#
# Under the terms of Contract DE-NA0003525 with NTESS,
# the U.S. Government retains certain rights in this software.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# 3. Neither the name of the Corporation nor the names of the
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# Questions? Contact Christian R. Trott (crtrott@sandia.gov)
#
# ************************************************************************
# @HEADER
KOKKOS_TPL_FIND_INCLUDE_DIRS_AND_LIBRARIES( quadmath
REQUIRED_HEADERS quadmath.h
REQUIRED_LIBS_NAMES quadmath
)

View File

@ -67,8 +67,13 @@ KOKKOS_ARCH_OPTION(ZEN3 HOST "AMD Zen3 architecture")
KOKKOS_ARCH_OPTION(VEGA900 GPU "AMD GPU MI25 GFX900")
KOKKOS_ARCH_OPTION(VEGA906 GPU "AMD GPU MI50/MI60 GFX906")
KOKKOS_ARCH_OPTION(VEGA908 GPU "AMD GPU MI100 GFX908")
KOKKOS_ARCH_OPTION(VEGA90A GPU "" )
KOKKOS_ARCH_OPTION(INTEL_GEN GPU "Intel GPUs Gen9+")
KOKKOS_ARCH_OPTION(INTEL_DG1 GPU "Intel Iris XeMAX GPU")
KOKKOS_ARCH_OPTION(INTEL_GEN9 GPU "Intel GPU Gen9")
KOKKOS_ARCH_OPTION(INTEL_GEN11 GPU "Intel GPU Gen11")
KOKKOS_ARCH_OPTION(INTEL_GEN12LP GPU "Intel GPU Gen12LP")
KOKKOS_ARCH_OPTION(INTEL_XEHP GPU "Intel GPU Xe-HP")
IF(KOKKOS_ENABLE_COMPILER_WARNINGS)
@ -76,6 +81,12 @@ IF(KOKKOS_ENABLE_COMPILER_WARNINGS)
"-Wall" "-Wunused-parameter" "-Wshadow" "-pedantic"
"-Wsign-compare" "-Wtype-limits" "-Wuninitialized")
# NOTE KOKKOS_ prefixed variable (all uppercase) is not set yet because TPLs are processed after ARCH
IF(Kokkos_ENABLE_LIBQUADMATH)
# warning: non-standard suffix on floating constant [-Wpedantic]
LIST(REMOVE_ITEM COMMON_WARNINGS "-pedantic")
ENDIF()
# OpenMPTarget compilers give erroneous warnings about sign comparison in loops
IF(KOKKOS_ENABLE_OPENMPTARGET)
LIST(REMOVE_ITEM COMMON_WARNINGS "-Wsign-compare")
@ -86,7 +97,7 @@ IF(KOKKOS_ENABLE_COMPILER_WARNINGS)
COMPILER_SPECIFIC_FLAGS(
COMPILER_ID CMAKE_CXX_COMPILER_ID
PGI NO-VALUE-SPECIFIED
NVHPC NO-VALUE-SPECIFIED
GNU ${GNU_WARNINGS}
DEFAULT ${COMMON_WARNINGS}
)
@ -158,16 +169,18 @@ ENDIF()
IF (KOKKOS_ARCH_ARMV80)
COMPILER_SPECIFIC_FLAGS(
Cray NO-VALUE-SPECIFIED
PGI NO-VALUE-SPECIFIED
COMPILER_ID KOKKOS_CXX_HOST_COMPILER_ID
Cray NO-VALUE-SPECIFIED
NVHPC NO-VALUE-SPECIFIED
DEFAULT -march=armv8-a
)
ENDIF()
IF (KOKKOS_ARCH_ARMV81)
COMPILER_SPECIFIC_FLAGS(
Cray NO-VALUE-SPECIFIED
PGI NO-VALUE-SPECIFIED
COMPILER_ID KOKKOS_CXX_HOST_COMPILER_ID
Cray NO-VALUE-SPECIFIED
NVHPC NO-VALUE-SPECIFIED
DEFAULT -march=armv8.1-a
)
ENDIF()
@ -175,8 +188,9 @@ ENDIF()
IF (KOKKOS_ARCH_ARMV8_THUNDERX)
SET(KOKKOS_ARCH_ARMV80 ON) #Not a cache variable
COMPILER_SPECIFIC_FLAGS(
Cray NO-VALUE-SPECIFIED
PGI NO-VALUE-SPECIFIED
COMPILER_ID KOKKOS_CXX_HOST_COMPILER_ID
Cray NO-VALUE-SPECIFIED
NVHPC NO-VALUE-SPECIFIED
DEFAULT -march=armv8-a -mtune=thunderx
)
ENDIF()
@ -184,23 +198,28 @@ ENDIF()
IF (KOKKOS_ARCH_ARMV8_THUNDERX2)
SET(KOKKOS_ARCH_ARMV81 ON) #Not a cache variable
COMPILER_SPECIFIC_FLAGS(
Cray NO-VALUE-SPECIFIED
PGI NO-VALUE-SPECIFIED
COMPILER_ID KOKKOS_CXX_HOST_COMPILER_ID
Cray NO-VALUE-SPECIFIED
NVHPC NO-VALUE-SPECIFIED
DEFAULT -mcpu=thunderx2t99 -mtune=thunderx2t99
)
ENDIF()
IF (KOKKOS_ARCH_A64FX)
COMPILER_SPECIFIC_FLAGS(
COMPILER_ID KOKKOS_CXX_HOST_COMPILER_ID
NVHPC NO-VALUE-SPECIFIED
DEFAULT -march=armv8.2-a+sve
Clang -march=armv8.2-a+sve -msve-vector-bits=512
GCC -march=armv8.2-a+sve -msve-vector-bits=512
Clang -march=armv8.2-a+sve -msve-vector-bits=512
GCC -march=armv8.2-a+sve -msve-vector-bits=512
)
ENDIF()
IF (KOKKOS_ARCH_ZEN)
COMPILER_SPECIFIC_FLAGS(
COMPILER_ID KOKKOS_CXX_HOST_COMPILER_ID
Intel -mavx2
NVHPC -tp=zen
DEFAULT -march=znver1 -mtune=znver1
)
SET(KOKKOS_ARCH_AMD_ZEN ON)
@ -209,7 +228,9 @@ ENDIF()
IF (KOKKOS_ARCH_ZEN2)
COMPILER_SPECIFIC_FLAGS(
COMPILER_ID KOKKOS_CXX_HOST_COMPILER_ID
Intel -mavx2
NVHPC -tp=zen2
DEFAULT -march=znver2 -mtune=znver2
)
SET(KOKKOS_ARCH_AMD_ZEN2 ON)
@ -218,7 +239,9 @@ ENDIF()
IF (KOKKOS_ARCH_ZEN3)
COMPILER_SPECIFIC_FLAGS(
COMPILER_ID KOKKOS_CXX_HOST_COMPILER_ID
Intel -mavx2
NVHPC -tp=zen2
DEFAULT -march=znver3 -mtune=znver3
)
SET(KOKKOS_ARCH_AMD_ZEN3 ON)
@ -227,8 +250,9 @@ ENDIF()
IF (KOKKOS_ARCH_WSM)
COMPILER_SPECIFIC_FLAGS(
COMPILER_ID KOKKOS_CXX_HOST_COMPILER_ID
Intel -xSSE4.2
PGI -tp=nehalem
NVHPC -tp=px
Cray NO-VALUE-SPECIFIED
DEFAULT -msse4.2
)
@ -238,8 +262,9 @@ ENDIF()
IF (KOKKOS_ARCH_SNB OR KOKKOS_ARCH_AMDAVX)
SET(KOKKOS_ARCH_AVX ON)
COMPILER_SPECIFIC_FLAGS(
COMPILER_ID KOKKOS_CXX_HOST_COMPILER_ID
Intel -mavx
PGI -tp=sandybridge
NVHPC -tp=sandybridge
Cray NO-VALUE-SPECIFIED
DEFAULT -mavx
)
@ -248,8 +273,9 @@ ENDIF()
IF (KOKKOS_ARCH_HSW)
SET(KOKKOS_ARCH_AVX2 ON)
COMPILER_SPECIFIC_FLAGS(
COMPILER_ID KOKKOS_CXX_HOST_COMPILER_ID
Intel -xCORE-AVX2
PGI -tp=haswell
NVHPC -tp=haswell
Cray NO-VALUE-SPECIFIED
DEFAULT -march=core-avx2 -mtune=core-avx2
)
@ -258,8 +284,9 @@ ENDIF()
IF (KOKKOS_ARCH_BDW)
SET(KOKKOS_ARCH_AVX2 ON)
COMPILER_SPECIFIC_FLAGS(
COMPILER_ID KOKKOS_CXX_HOST_COMPILER_ID
Intel -xCORE-AVX2
PGI -tp=haswell
NVHPC -tp=haswell
Cray NO-VALUE-SPECIFIED
DEFAULT -march=core-avx2 -mtune=core-avx2 -mrtm
)
@ -269,8 +296,9 @@ IF (KOKKOS_ARCH_KNL)
#avx512-mic
SET(KOKKOS_ARCH_AVX512MIC ON) #not a cache variable
COMPILER_SPECIFIC_FLAGS(
COMPILER_ID KOKKOS_CXX_HOST_COMPILER_ID
Intel -xMIC-AVX512
PGI NO-VALUE-SPECIFIED
NVHPC -tp=knl
Cray NO-VALUE-SPECIFIED
DEFAULT -march=knl -mtune=knl
)
@ -279,6 +307,7 @@ ENDIF()
IF (KOKKOS_ARCH_KNC)
SET(KOKKOS_USE_ISA_KNC ON)
COMPILER_SPECIFIC_FLAGS(
COMPILER_ID KOKKOS_CXX_HOST_COMPILER_ID
DEFAULT -mmic
)
ENDIF()
@ -287,8 +316,9 @@ IF (KOKKOS_ARCH_SKX)
#avx512-xeon
SET(KOKKOS_ARCH_AVX512XEON ON)
COMPILER_SPECIFIC_FLAGS(
COMPILER_ID KOKKOS_CXX_HOST_COMPILER_ID
Intel -xCORE-AVX512
PGI NO-VALUE-SPECIFIED
NVHPC -tp=skylake
Cray NO-VALUE-SPECIFIED
DEFAULT -march=skylake-avx512 -mtune=skylake-avx512 -mrtm
)
@ -304,7 +334,8 @@ ENDIF()
IF (KOKKOS_ARCH_POWER7)
COMPILER_SPECIFIC_FLAGS(
PGI NO-VALUE-SPECIFIED
COMPILER_ID KOKKOS_CXX_HOST_COMPILER_ID
NVHPC NO-VALUE-SPECIFIED
DEFAULT -mcpu=power7 -mtune=power7
)
SET(KOKKOS_USE_ISA_POWERPCBE ON)
@ -312,16 +343,16 @@ ENDIF()
IF (KOKKOS_ARCH_POWER8)
COMPILER_SPECIFIC_FLAGS(
PGI NO-VALUE-SPECIFIED
NVIDIA NO-VALUE-SPECIFIED
COMPILER_ID KOKKOS_CXX_HOST_COMPILER_ID
NVHPC -tp=pwr8
DEFAULT -mcpu=power8 -mtune=power8
)
ENDIF()
IF (KOKKOS_ARCH_POWER9)
COMPILER_SPECIFIC_FLAGS(
PGI NO-VALUE-SPECIFIED
NVIDIA NO-VALUE-SPECIFIED
COMPILER_ID KOKKOS_CXX_HOST_COMPILER_ID
NVHPC -tp=pwr9
DEFAULT -mcpu=power9 -mtune=power9
)
ENDIF()
@ -368,7 +399,7 @@ ENDIF()
IF (KOKKOS_ENABLE_SYCL)
COMPILER_SPECIFIC_FLAGS(
DEFAULT -fsycl
DEFAULT -fsycl -fno-sycl-id-queries-fit-in-int
)
COMPILER_SPECIFIC_OPTIONS(
DEFAULT -fsycl-unnamed-lambda
@ -443,20 +474,58 @@ ENDFUNCTION()
CHECK_AMDGPU_ARCH(VEGA900 gfx900) # Radeon Instinct MI25
CHECK_AMDGPU_ARCH(VEGA906 gfx906) # Radeon Instinct MI50 and MI60
CHECK_AMDGPU_ARCH(VEGA908 gfx908)
CHECK_AMDGPU_ARCH(VEGA90A gfx90a)
IF(KOKKOS_ENABLE_HIP AND NOT AMDGPU_ARCH_ALREADY_SPECIFIED)
MESSAGE(SEND_ERROR "HIP enabled but no AMD GPU architecture currently enabled. "
"Please enable one AMD GPU architecture via -DKokkos_ARCH_{..}=ON'.")
IF(KOKKOS_CXX_COMPILER_ID STREQUAL HIPCC)
FIND_PROGRAM(ROCM_ENUMERATOR rocm_agent_enumerator)
EXECUTE_PROCESS(COMMAND ${ROCM_ENUMERATOR} OUTPUT_VARIABLE GPU_ARCHS)
STRING(LENGTH "${GPU_ARCHS}" len_str)
# enumerator always output gfx000 as the first line
IF(${len_str} LESS 8)
MESSAGE(SEND_ERROR "HIP enabled but no AMD GPU architecture currently enabled. "
"Please enable one AMD GPU architecture via -DKokkos_ARCH_{..}=ON'.")
ENDIF()
ELSE()
MESSAGE(SEND_ERROR "HIP enabled but no AMD GPU architecture currently enabled. "
"Please enable one AMD GPU architecture via -DKokkos_ARCH_{..}=ON'.")
ENDIF()
ENDIF()
MACRO(CHECK_MULTIPLE_INTEL_ARCH)
IF(KOKKOS_ARCH_INTEL_GPU)
MESSAGE(FATAL_ERROR "Specifying multiple Intel GPU architectures is not allowed!")
ENDIF()
SET(KOKKOS_ARCH_INTEL_GPU ON)
ENDMACRO()
IF(KOKKOS_ARCH_INTEL_GEN)
CHECK_MULTIPLE_INTEL_ARCH()
ENDIF()
IF(KOKKOS_ARCH_INTEL_DG1)
CHECK_MULTIPLE_INTEL_ARCH()
ENDIF()
IF(KOKKOS_ARCH_INTEL_GEN9)
CHECK_MULTIPLE_INTEL_ARCH()
ENDIF()
IF(KOKKOS_ARCH_INTEL_GEN11)
CHECK_MULTIPLE_INTEL_ARCH()
ENDIF()
IF(KOKKOS_ARCH_INTEL_GEN12LP)
CHECK_MULTIPLE_INTEL_ARCH()
ENDIF()
IF(KOKKOS_ARCH_INTEL_XEHP)
CHECK_MULTIPLE_INTEL_ARCH()
ENDIF()
IF (KOKKOS_ENABLE_OPENMPTARGET)
SET(CLANG_CUDA_ARCH ${KOKKOS_CUDA_ARCH_FLAG})
IF (CLANG_CUDA_ARCH)
STRING(REPLACE "sm_" "cc" PGI_CUDA_ARCH ${CLANG_CUDA_ARCH})
STRING(REPLACE "sm_" "cc" NVHPC_CUDA_ARCH ${CLANG_CUDA_ARCH})
COMPILER_SPECIFIC_FLAGS(
Clang -Xopenmp-target -march=${CLANG_CUDA_ARCH} -fopenmp-targets=nvptx64-nvidia-cuda
XL -qtgtarch=${KOKKOS_CUDA_ARCH_FLAG}
PGI -gpu=${PGI_CUDA_ARCH}
XL -qtgtarch=${KOKKOS_CUDA_ARCH_FLAG}
NVHPC -gpu=${NVHPC_CUDA_ARCH}
)
ENDIF()
SET(CLANG_AMDGPU_ARCH ${KOKKOS_AMDGPU_ARCH_FLAG})
@ -465,7 +534,7 @@ IF (KOKKOS_ENABLE_OPENMPTARGET)
Clang -Xopenmp-target=amdgcn-amd-amdhsa -march=${CLANG_AMDGPU_ARCH} -fopenmp-targets=amdgcn-amd-amdhsa
)
ENDIF()
IF (KOKKOS_ARCH_INTEL_GEN)
IF (KOKKOS_ARCH_INTEL_GPU)
COMPILER_SPECIFIC_FLAGS(
IntelLLVM -fopenmp-targets=spir64 -D__STRICT_ANSI__
)
@ -485,7 +554,27 @@ IF (KOKKOS_ENABLE_SYCL)
ENDIF()
ELSEIF(KOKKOS_ARCH_INTEL_GEN)
COMPILER_SPECIFIC_FLAGS(
DEFAULT -fsycl-targets=spir64_gen-unknown-unknown-sycldevice -Xsycl-target-backend "-device skl"
DEFAULT -fsycl-targets=spir64_gen-unknown-unknown-sycldevice -Xsycl-target-backend "-device gen9-"
)
ELSEIF(KOKKOS_ARCH_INTEL_GEN9)
COMPILER_SPECIFIC_FLAGS(
DEFAULT -fsycl-targets=spir64_gen-unknown-unknown-sycldevice -Xsycl-target-backend "-device gen9"
)
ELSEIF(KOKKOS_ARCH_INTEL_GEN11)
COMPILER_SPECIFIC_FLAGS(
DEFAULT -fsycl-targets=spir64_gen-unknown-unknown-sycldevice -Xsycl-target-backend "-device gen11"
)
ELSEIF(KOKKOS_ARCH_INTEL_GEN12LP)
COMPILER_SPECIFIC_FLAGS(
DEFAULT -fsycl-targets=spir64_gen-unknown-unknown-sycldevice -Xsycl-target-backend "-device gen12lp"
)
ELSEIF(KOKKOS_ARCH_INTEL_DG1)
COMPILER_SPECIFIC_FLAGS(
DEFAULT -fsycl-targets=spir64_gen-unknown-unknown-sycldevice -Xsycl-target-backend "-device dg1"
)
ELSEIF(KOKKOS_ARCH_INTEL_XEHP)
COMPILER_SPECIFIC_FLAGS(
DEFAULT -fsycl-targets=spir64_gen-unknown-unknown-sycldevice -Xsycl-target-backend "-device xehp"
)
ENDIF()
ENDIF()

View File

@ -137,7 +137,7 @@ SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n Clang 4.0.0 or higher"
SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n GCC 5.3.0 or higher")
SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n Intel 17.0.0 or higher")
SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n NVCC 9.2.88 or higher")
SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n HIPCC 3.8.0 or higher")
SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n HIPCC 4.2.0 or higher")
SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n PGI 17.4 or higher\n")
IF(KOKKOS_CXX_COMPILER_ID STREQUAL Clang)
@ -158,13 +158,23 @@ ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA)
ENDIF()
SET(CMAKE_CXX_EXTENSIONS OFF CACHE BOOL "Kokkos turns off CXX extensions" FORCE)
ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL HIPCC)
IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 3.8.0)
IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 4.2.0)
MESSAGE(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}")
ENDIF()
ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL PGI)
IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 17.4)
MESSAGE(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}")
ENDIF()
# Treat PGI internally as NVHPC to simplify handling both compilers.
# Before CMake 3.20 NVHPC was identified as PGI, nvc++ is
# backward-compatible to pgc++.
SET(KOKKOS_CXX_COMPILER_ID NVHPC CACHE STRING INTERNAL FORCE)
ENDIF()
IF(NOT DEFINED KOKKOS_CXX_HOST_COMPILER_ID)
SET(KOKKOS_CXX_HOST_COMPILER_ID ${KOKKOS_CXX_COMPILER_ID})
ELSEIF(KOKKOS_CXX_HOST_COMPILER_ID STREQUAL PGI)
SET(KOKKOS_CXX_HOST_COMPILER_ID NVHPC CACHE STRING INTERNAL FORCE)
ENDIF()
STRING(REPLACE "." ";" VERSION_LIST ${KOKKOS_CXX_COMPILER_VERSION})

View File

@ -62,7 +62,7 @@ IF(KOKKOS_ENABLE_OPENMP)
COMPILER_ID KOKKOS_CXX_HOST_COMPILER_ID
Clang -Xcompiler ${ClangOpenMPFlag}
IntelLLVM -Xcompiler -fiopenmp
PGI -Xcompiler -mp
NVHPC -Xcompiler -mp
Cray NO-VALUE-SPECIFIED
XL -Xcompiler -qsmp=omp
DEFAULT -Xcompiler -fopenmp
@ -72,7 +72,7 @@ IF(KOKKOS_ENABLE_OPENMP)
Clang ${ClangOpenMPFlag}
IntelLLVM -fiopenmp
AppleClang -Xpreprocessor -fopenmp
PGI -mp
NVHPC -mp
Cray NO-VALUE-SPECIFIED
XL -qsmp=omp
DEFAULT -fopenmp
@ -94,7 +94,7 @@ IF (KOKKOS_ENABLE_OPENMPTARGET)
Clang ${ClangOpenMPFlag} -Wno-openmp-mapping
IntelLLVM -fiopenmp -Wno-openmp-mapping
XL -qsmp=omp -qoffload -qnoeh
PGI -mp=gpu
NVHPC -mp=gpu
DEFAULT -fopenmp
)
COMPILER_SPECIFIC_DEFS(

View File

@ -26,9 +26,16 @@ KOKKOS_CFG_DEPENDS(OPTIONS COMPILER_ID)
# Put a check in just in case people are using this option
KOKKOS_DEPRECATED_LIST(OPTIONS ENABLE)
# Set the Default for Desul Atomics usage.
set(_DESUL_ATOMICS_DEFAULT ON)
KOKKOS_ENABLE_OPTION(CUDA_RELOCATABLE_DEVICE_CODE OFF "Whether to enable relocatable device code (RDC) for CUDA")
KOKKOS_ENABLE_OPTION(CUDA_UVM OFF "Whether to use unified memory (UM) for CUDA by default")
KOKKOS_ENABLE_OPTION(CUDA_LDG_INTRINSIC OFF "Whether to use CUDA LDG intrinsics")
# As of 08/12/2021 CudaMallocAsync causes issues if UCX is used as MPI communication layer.
KOKKOS_ENABLE_OPTION(IMPL_CUDA_MALLOC_ASYNC OFF "Whether to enable CudaMallocAsync (requires CUDA Toolkit 11.2)")
KOKKOS_ENABLE_OPTION(DEPRECATED_CODE_3 ON "Whether code deprecated in major release 3 is available" )
KOKKOS_ENABLE_OPTION(DEPRECATION_WARNINGS ON "Whether to emit deprecation warnings" )
KOKKOS_ENABLE_OPTION(HIP_RELOCATABLE_DEVICE_CODE OFF "Whether to enable relocatable device code (RDC) for HIP")
KOKKOS_ENABLE_OPTION(HPX_ASYNC_DISPATCH OFF "Whether HPX supports asynchronous dispatch")
KOKKOS_ENABLE_OPTION(TESTS OFF "Whether to build the unit tests")
@ -50,6 +57,9 @@ KOKKOS_ENABLE_OPTION(TUNING OFF "Whether to create bindings for tu
KOKKOS_ENABLE_OPTION(AGGRESSIVE_VECTORIZATION OFF "Whether to aggressively vectorize loops")
KOKKOS_ENABLE_OPTION(LAUNCH_COMPILER ON "Whether to potentially use the launch compiler")
# This option will go away eventually, but allows fallback to old implementation when needed.
KOKKOS_ENABLE_OPTION(IMPL_DESUL_ATOMICS ON "Whether to use desul based atomics - option only during beta")
IF (KOKKOS_ENABLE_CUDA)
SET(KOKKOS_COMPILER_CUDA_VERSION "${KOKKOS_COMPILER_VERSION_MAJOR}${KOKKOS_COMPILER_VERSION_MINOR}")
ENDIF()

View File

@ -773,7 +773,7 @@ FUNCTION(kokkos_link_tpl TARGET)
ENDFUNCTION()
FUNCTION(COMPILER_SPECIFIC_OPTIONS_HELPER)
SET(COMPILERS NVIDIA PGI XL DEFAULT Cray Intel Clang AppleClang IntelLLVM GNU HIPCC Fujitsu)
SET(COMPILERS NVIDIA NVHPC XL XLClang DEFAULT Cray Intel Clang AppleClang IntelLLVM GNU HIPCC Fujitsu)
CMAKE_PARSE_ARGUMENTS(
PARSE
"LINK_OPTIONS;COMPILE_OPTIONS;COMPILE_DEFINITIONS;LINK_LIBRARIES"

View File

@ -140,7 +140,7 @@ IF (NOT KOKKOS_CXX_STANDARD_FEATURE)
IF(KOKKOS_CXX_COMPILER_ID STREQUAL Cray)
INCLUDE(${KOKKOS_SRC_PATH}/cmake/cray.cmake)
kokkos_set_cray_flags(${KOKKOS_CXX_STANDARD} ${KOKKOS_CXX_INTERMEDIATE_STANDARD})
ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL PGI)
ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL NVHPC)
INCLUDE(${KOKKOS_SRC_PATH}/cmake/pgi.cmake)
kokkos_set_pgi_flags(${KOKKOS_CXX_STANDARD} ${KOKKOS_CXX_INTERMEDIATE_STANDARD})
ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL Intel)

View File

@ -67,6 +67,12 @@ SET(PTHREAD_DEFAULT OFF)
ENDIF()
KOKKOS_TPL_OPTION(PTHREAD ${PTHREAD_DEFAULT} TRIBITS Pthread)
IF(Trilinos_ENABLE_Kokkos AND TPL_ENABLE_quadmath)
SET(LIBQUADMATH_DEFAULT ON)
ELSE()
SET(LIBQUADMATH_DEFAULT OFF)
ENDIF()
KOKKOS_TPL_OPTION(LIBQUADMATH ${LIBQUADMATH_DEFAULT} TRIBITS quadmath)
#Make sure we use our local FindKokkosCuda.cmake
KOKKOS_IMPORT_TPL(HPX INTERFACE)
@ -78,6 +84,7 @@ KOKKOS_IMPORT_TPL(LIBDL)
KOKKOS_IMPORT_TPL(MEMKIND)
KOKKOS_IMPORT_TPL(PTHREAD INTERFACE)
KOKKOS_IMPORT_TPL(ROCM INTERFACE)
KOKKOS_IMPORT_TPL(LIBQUADMATH)
#Convert list to newlines (which CMake doesn't always like in cache variables)
STRING(REPLACE ";" "\n" KOKKOS_TPL_EXPORT_TEMP "${KOKKOS_TPL_EXPORTS}")

View File

@ -0,0 +1,46 @@
# @HEADER
# ************************************************************************
#
# Kokkos v. 3.0
# Copyright (2020) National Technology & Engineering
# Solutions of Sandia, LLC (NTESS).
#
# Under the terms of Contract DE-NA0003525 with NTESS,
# the U.S. Government retains certain rights in this software.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# 3. Neither the name of the Corporation nor the names of the
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# Questions? Contact Christian R. Trott (crtrott@sandia.gov)
#
# ************************************************************************
# @HEADER
TRIBITS_TPL_FIND_INCLUDE_DIRS_AND_LIBRARIES( quadmath
REQUIRED_HEADERS quadmath.h
REQUIRED_LIBS_NAMES quadmath
)

View File

@ -48,7 +48,7 @@
#include <Kokkos_DynRankView.hpp>
#include <vector>
#include <impl/Kokkos_Timer.hpp>
#include <Kokkos_Timer.hpp>
// Compare performance of DynRankView to View, specific focus on the parenthesis
// operators

View File

@ -48,7 +48,7 @@
#include <vector>
#include <algorithm>
#include <impl/Kokkos_Timer.hpp>
#include <Kokkos_Timer.hpp>
// This test will simulate global ids

View File

@ -46,7 +46,7 @@
#define KOKKOS_TEST_SCATTER_VIEW_HPP
#include <Kokkos_ScatterView.hpp>
#include <impl/Kokkos_Timer.hpp>
#include <Kokkos_Timer.hpp>
namespace Perf {

View File

@ -43,7 +43,7 @@
#ifndef KOKKOS_TEST_UNORDERED_MAP_PERFORMANCE_HPP
#define KOKKOS_TEST_UNORDERED_MAP_PERFORMANCE_HPP
#include <impl/Kokkos_Timer.hpp>
#include <Kokkos_Timer.hpp>
#include <iostream>
#include <iomanip>

View File

@ -76,20 +76,25 @@ class Bitset {
using execution_space = Device;
using size_type = unsigned int;
enum { BIT_SCAN_REVERSE = 1u };
enum { MOVE_HINT_BACKWARD = 2u };
static constexpr unsigned BIT_SCAN_REVERSE = 1u;
static constexpr unsigned MOVE_HINT_BACKWARD = 2u;
enum {
BIT_SCAN_FORWARD_MOVE_HINT_FORWARD = 0u,
BIT_SCAN_REVERSE_MOVE_HINT_FORWARD = BIT_SCAN_REVERSE,
BIT_SCAN_FORWARD_MOVE_HINT_BACKWARD = MOVE_HINT_BACKWARD,
BIT_SCAN_REVERSE_MOVE_HINT_BACKWARD = BIT_SCAN_REVERSE | MOVE_HINT_BACKWARD
};
static constexpr unsigned BIT_SCAN_FORWARD_MOVE_HINT_FORWARD = 0u;
static constexpr unsigned BIT_SCAN_REVERSE_MOVE_HINT_FORWARD =
BIT_SCAN_REVERSE;
static constexpr unsigned BIT_SCAN_FORWARD_MOVE_HINT_BACKWARD =
MOVE_HINT_BACKWARD;
static constexpr unsigned BIT_SCAN_REVERSE_MOVE_HINT_BACKWARD =
BIT_SCAN_REVERSE | MOVE_HINT_BACKWARD;
private:
enum { block_size = static_cast<unsigned>(sizeof(unsigned) * CHAR_BIT) };
enum { block_mask = block_size - 1u };
enum { block_shift = Kokkos::Impl::integral_power_of_two(block_size) };
enum : unsigned {
block_size = static_cast<unsigned>(sizeof(unsigned) * CHAR_BIT)
};
enum : unsigned { block_mask = block_size - 1u };
enum : unsigned {
block_shift = Kokkos::Impl::integral_power_of_two(block_size)
};
public:
/// constructor
@ -317,14 +322,18 @@ class ConstBitset {
enum { block_shift = Kokkos::Impl::integral_power_of_two(block_size) };
public:
KOKKOS_FUNCTION
ConstBitset() : m_size(0) {}
KOKKOS_FUNCTION
ConstBitset(Bitset<Device> const& rhs)
: m_size(rhs.m_size), m_blocks(rhs.m_blocks) {}
KOKKOS_FUNCTION
ConstBitset(ConstBitset<Device> const& rhs)
: m_size(rhs.m_size), m_blocks(rhs.m_blocks) {}
KOKKOS_FUNCTION
ConstBitset<Device>& operator=(Bitset<Device> const& rhs) {
this->m_size = rhs.m_size;
this->m_blocks = rhs.m_blocks;
@ -332,6 +341,7 @@ class ConstBitset {
return *this;
}
KOKKOS_FUNCTION
ConstBitset<Device>& operator=(ConstBitset<Device> const& rhs) {
this->m_size = rhs.m_size;
this->m_blocks = rhs.m_blocks;

View File

@ -597,8 +597,10 @@ class DualView : public ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type> {
}
if (std::is_same<typename t_host::memory_space,
typename t_dev::memory_space>::value) {
typename t_dev::execution_space().fence();
typename t_host::execution_space().fence();
typename t_dev::execution_space().fence(
"Kokkos::DualView<>::sync: fence after syncing DualView");
typename t_host::execution_space().fence(
"Kokkos::DualView<>::sync: fence after syncing DualView");
}
}
@ -776,10 +778,11 @@ class DualView : public ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type> {
/// If \c Device is the same as this DualView's device type, then
/// mark the device's data as modified. Otherwise, mark the host's
/// data as modified.
template <class Device>
template <class Device, class Dummy = DualView,
std::enable_if_t<!Dummy::impl_dualview_is_single_device::value>* =
nullptr>
void modify() {
if (modified_flags.data() == nullptr) return;
if (impl_dualview_is_single_device::value) return;
int dev = get_device_side<Device>();
if (dev == 1) { // if Device is the same as DualView's device type
@ -811,8 +814,17 @@ class DualView : public ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type> {
#endif
}
template <
class Device, class Dummy = DualView,
std::enable_if_t<Dummy::impl_dualview_is_single_device::value>* = nullptr>
void modify() {
return;
}
template <class Dummy = DualView,
std::enable_if_t<!Dummy::impl_dualview_is_single_device::value>* =
nullptr>
inline void modify_host() {
if (impl_dualview_is_single_device::value) return;
if (modified_flags.data() != nullptr) {
modified_flags(0) =
(modified_flags(1) > modified_flags(0) ? modified_flags(1)
@ -832,8 +844,17 @@ class DualView : public ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type> {
}
}
template <
class Dummy = DualView,
std::enable_if_t<Dummy::impl_dualview_is_single_device::value>* = nullptr>
inline void modify_host() {
return;
}
template <class Dummy = DualView,
std::enable_if_t<!Dummy::impl_dualview_is_single_device::value>* =
nullptr>
inline void modify_device() {
if (impl_dualview_is_single_device::value) return;
if (modified_flags.data() != nullptr) {
modified_flags(1) =
(modified_flags(1) > modified_flags(0) ? modified_flags(1)
@ -853,6 +874,13 @@ class DualView : public ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type> {
}
}
template <
class Dummy = DualView,
std::enable_if_t<Dummy::impl_dualview_is_single_device::value>* = nullptr>
inline void modify_device() {
return;
}
inline void clear_sync_state() {
if (modified_flags.data() != nullptr)
modified_flags(1) = modified_flags(0) = 0;
@ -875,8 +903,15 @@ class DualView : public ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type> {
const size_t n5 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
const size_t n6 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
const size_t n7 = KOKKOS_IMPL_CTOR_DEFAULT_ARG) {
::Kokkos::realloc(d_view, n0, n1, n2, n3, n4, n5, n6, n7);
h_view = create_mirror_view(d_view);
const size_t new_extents[8] = {n0, n1, n2, n3, n4, n5, n6, n7};
const bool sizeMismatch =
Impl::size_mismatch(h_view, h_view.rank_dynamic, new_extents);
if (sizeMismatch) {
::Kokkos::realloc(d_view, n0, n1, n2, n3, n4, n5, n6, n7);
h_view = create_mirror_view(d_view);
} else
::Kokkos::deep_copy(d_view, typename t_dev::value_type{});
/* Reset dirty flags */
if (modified_flags.data() == nullptr) {
@ -897,41 +932,31 @@ class DualView : public ViewTraits<DataType, Arg1Type, Arg2Type, Arg3Type> {
const size_t n5 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
const size_t n6 = KOKKOS_IMPL_CTOR_DEFAULT_ARG,
const size_t n7 = KOKKOS_IMPL_CTOR_DEFAULT_ARG) {
const size_t new_extents[8] = {n0, n1, n2, n3, n4, n5, n6, n7};
const bool sizeMismatch =
Impl::size_mismatch(h_view, h_view.rank_dynamic, new_extents);
if (modified_flags.data() == nullptr) {
modified_flags = t_modified_flags("DualView::modified_flags");
}
if (modified_flags(1) >= modified_flags(0)) {
/* Resize on Device */
::Kokkos::resize(d_view, n0, n1, n2, n3, n4, n5, n6, n7);
h_view = create_mirror_view(d_view);
/* Mark Device copy as modified */
modified_flags(1) = modified_flags(1) + 1;
if (sizeMismatch) {
::Kokkos::resize(d_view, n0, n1, n2, n3, n4, n5, n6, n7);
h_view = create_mirror_view(d_view);
/* Mark Device copy as modified */
modified_flags(1) = modified_flags(1) + 1;
}
} else {
/* Realloc on Device */
::Kokkos::realloc(d_view, n0, n1, n2, n3, n4, n5, n6, n7);
const bool sizeMismatch =
(h_view.extent(0) != n0) || (h_view.extent(1) != n1) ||
(h_view.extent(2) != n2) || (h_view.extent(3) != n3) ||
(h_view.extent(4) != n4) || (h_view.extent(5) != n5) ||
(h_view.extent(6) != n6) || (h_view.extent(7) != n7);
if (sizeMismatch)
if (sizeMismatch) {
::Kokkos::resize(h_view, n0, n1, n2, n3, n4, n5, n6, n7);
d_view = create_mirror_view(typename t_dev::execution_space(), h_view);
t_host temp_view = create_mirror_view(d_view);
/* Remap on Host */
Kokkos::deep_copy(temp_view, h_view);
h_view = temp_view;
d_view = create_mirror_view(typename t_dev::execution_space(), h_view);
/* Mark Host copy as modified */
modified_flags(0) = modified_flags(0) + 1;
/* Mark Host copy as modified */
modified_flags(0) = modified_flags(0) + 1;
}
}
}

View File

@ -1140,7 +1140,8 @@ class DynRankView : public ViewTraits<DataType, Properties...> {
// to avoid incomplete type errors from usng Kokkos::Cuda directly.
if (std::is_same<Kokkos::CudaUVMSpace,
typename traits::device_type::memory_space>::value) {
typename traits::device_type::memory_space::execution_space().fence();
typename traits::device_type::memory_space::execution_space().fence(
"Kokkos::DynRankView<>::DynRankView: fence before UVM allocation");
}
#endif
//------------------------------------------------------------
@ -1154,7 +1155,8 @@ class DynRankView : public ViewTraits<DataType, Properties...> {
#if defined(KOKKOS_ENABLE_CUDA)
if (std::is_same<Kokkos::CudaUVMSpace,
typename traits::device_type::memory_space>::value) {
typename traits::device_type::memory_space::execution_space().fence();
typename traits::device_type::memory_space::execution_space().fence(
"Kokkos::DynRankView<>::DynRankView: fence after UVM allocation");
}
#endif
//------------------------------------------------------------
@ -1404,7 +1406,7 @@ class ViewMapping<
template <class MemoryTraits>
struct apply {
static_assert(Kokkos::Impl::is_memory_traits<MemoryTraits>::value, "");
static_assert(Kokkos::is_memory_traits<MemoryTraits>::value, "");
using traits_type =
Kokkos::ViewTraits<data_type, array_layout,
@ -1574,7 +1576,7 @@ KOKKOS_INLINE_FUNCTION bool operator!=(const DynRankView<LT, LP...>& lhs,
namespace Kokkos {
namespace Impl {
template <class OutputView, typename Enable = void>
template <class OutputView, class Enable = void>
struct DynRankViewFill {
using const_value_type = typename OutputView::traits::const_value_type;
@ -1693,9 +1695,11 @@ inline void deep_copy(
typename ViewTraits<DT, DP...>::value_type>::value,
"deep_copy requires non-const type");
Kokkos::fence();
Kokkos::fence(
"Kokkos::deep_copy(DynRankView, value_type): fence before filling view");
Kokkos::Impl::DynRankViewFill<DynRankView<DT, DP...> >(dst, value);
Kokkos::fence();
Kokkos::fence(
"Kokkos::deep_copy(DynRankView, value_type): fence after filling view");
}
/** \brief Deep copy into a value in Host memory from a view. */
@ -1711,10 +1715,13 @@ inline void deep_copy(
using src_traits = ViewTraits<ST, SP...>;
using src_memory_space = typename src_traits::memory_space;
Kokkos::fence();
Kokkos::fence(
"Kokkos::deep_copy(value_type, DynRankView): fence before copying "
"value");
Kokkos::Impl::DeepCopy<HostSpace, src_memory_space>(&dst, src.data(),
sizeof(ST));
Kokkos::fence();
Kokkos::fence(
"Kokkos::deep_copy(value_type, DynRankView): fence after copying value");
}
//----------------------------------------------------------------------------
@ -1744,14 +1751,14 @@ inline void deep_copy(
enum {
DstExecCanAccessSrc =
Kokkos::Impl::SpaceAccessibility<dst_execution_space,
src_memory_space>::accessible
Kokkos::SpaceAccessibility<dst_execution_space,
src_memory_space>::accessible
};
enum {
SrcExecCanAccessDst =
Kokkos::Impl::SpaceAccessibility<src_execution_space,
dst_memory_space>::accessible
Kokkos::SpaceAccessibility<src_execution_space,
dst_memory_space>::accessible
};
if ((void*)dst.data() != (void*)src.data()) {
@ -1762,10 +1769,14 @@ inline void deep_copy(
// memory then can byte-wise copy
if (rank(src) == 0 && rank(dst) == 0) {
using value_type = typename dst_type::value_type;
Kokkos::fence();
Kokkos::fence(
"Kokkos::Impl::DeepCopy(DynRankView, DynRankView): fence before "
"copying rank-0 views");
Kokkos::Impl::DeepCopy<dst_memory_space, src_memory_space>(
dst.data(), src.data(), sizeof(value_type));
Kokkos::fence();
Kokkos::fence(
"Kokkos::Impl::DeepCopy(DynRankView, DynRankView): fence after "
"copying rank-0 views");
} else if (std::is_same<
typename DstType::traits::value_type,
typename SrcType::traits::non_const_value_type>::value &&
@ -1787,10 +1798,14 @@ inline void deep_copy(
dst.extent(6) == src.extent(6) &&
dst.extent(7) == src.extent(7)) {
const size_t nbytes = sizeof(typename dst_type::value_type) * dst.span();
Kokkos::fence();
Kokkos::fence(
"Kokkos::Impl::DeepCopy(DynRankView, DynRankView): fence before "
"copying rank-1 views");
Kokkos::Impl::DeepCopy<dst_memory_space, src_memory_space>(
dst.data(), src.data(), nbytes);
Kokkos::fence();
Kokkos::fence(
"Kokkos::Impl::DeepCopy(DynRankView, DynRankView): fence after "
"copying rank-1 views");
} else if (std::is_same<
typename DstType::traits::value_type,
typename SrcType::traits::non_const_value_type>::value &&
@ -1817,29 +1832,43 @@ inline void deep_copy(
dst.stride_6() == src.stride_6() &&
dst.stride_7() == src.stride_7()) {
const size_t nbytes = sizeof(typename dst_type::value_type) * dst.span();
Kokkos::fence();
Kokkos::fence(
"Kokkos::Impl::DeepCopy(DynRankView, DynRankView): fence before "
"copying rank-1 views");
Kokkos::Impl::DeepCopy<dst_memory_space, src_memory_space>(
dst.data(), src.data(), nbytes);
Kokkos::fence();
Kokkos::fence(
"Kokkos::Impl::DeepCopy(DynRankView, DynRankView): fence after "
"copying rank-1 views");
} else if (DstExecCanAccessSrc) {
// Copying data between views in accessible memory spaces and either
// non-contiguous or incompatible shape.
Kokkos::fence();
Kokkos::fence(
"Kokkos::Impl::DeepCopy(DynRankView, DynRankView): fence before "
"remapping views of incompatible shape");
Kokkos::Impl::DynRankViewRemap<dst_type, src_type>(dst, src);
Kokkos::fence();
Kokkos::fence(
"Kokkos::Impl::DeepCopy(DynRankView, DynRankView): fence after "
"remapping views of incompatible shape");
} else if (SrcExecCanAccessDst) {
// Copying data between views in accessible memory spaces and either
// non-contiguous or incompatible shape.
Kokkos::fence();
Kokkos::fence(
"Kokkos::Impl::DeepCopy(DynRankView, DynRankView): fence before "
"remapping views of incompatible shape");
Kokkos::Impl::DynRankViewRemap<dst_type, src_type, src_execution_space>(
dst, src);
Kokkos::fence();
Kokkos::fence(
"Kokkos::Impl::DeepCopy(DynRankView, DynRankView): fence after "
"remapping views of incompatible shape");
} else {
Kokkos::Impl::throw_runtime_exception(
"deep_copy given views that would require a temporary allocation");
}
} else {
Kokkos::fence();
Kokkos::fence(
"Kokkos::Impl::DeepCopy(DynRankView, DynRankView): fence due to same "
"src and dst");
}
}

View File

@ -53,36 +53,203 @@
namespace Kokkos {
namespace Experimental {
// Simple metafunction for choosing memory space
// In the current implementation, if memory_space == CudaSpace,
// use CudaUVMSpace for the chunk 'array' allocation, which
// contains will contain pointers to chunks of memory allocated
// in CudaSpace
namespace Impl {
template <class MemSpace>
struct ChunkArraySpace {
using memory_space = MemSpace;
/// Utility class to manage memory for chunked arrays on the host and
/// device. Allocates/deallocates memory on both the host and device along with
/// providing utilities for creating mirrors and deep copying between them.
template <typename MemorySpace, typename ValueType>
struct ChunkedArrayManager {
using value_type = ValueType;
using pointer_type = ValueType*;
using track_type = Kokkos::Impl::SharedAllocationTracker;
ChunkedArrayManager() = default;
ChunkedArrayManager(ChunkedArrayManager const&) = default;
ChunkedArrayManager(ChunkedArrayManager&&) = default;
ChunkedArrayManager& operator=(ChunkedArrayManager&&) = default;
ChunkedArrayManager& operator=(const ChunkedArrayManager&) = default;
template <typename Space, typename Value>
friend struct ChunkedArrayManager;
template <typename Space, typename Value>
inline ChunkedArrayManager(const ChunkedArrayManager<Space, Value>& rhs)
: m_valid(rhs.m_valid),
m_chunk_max(rhs.m_chunk_max),
m_chunks((ValueType**)(rhs.m_chunks)),
m_track(rhs.m_track),
m_chunk_size(rhs.m_chunk_size) {
static_assert(
Kokkos::Impl::MemorySpaceAccess<MemorySpace, Space>::assignable,
"Incompatible ChunkedArrayManager copy construction");
}
ChunkedArrayManager(const unsigned arg_chunk_max,
const unsigned arg_chunk_size)
: m_chunk_max(arg_chunk_max), m_chunk_size(arg_chunk_size) {}
private:
struct ACCESSIBLE_TAG {};
struct INACCESSIBLE_TAG {};
ChunkedArrayManager(ACCESSIBLE_TAG, pointer_type* arg_chunks,
const unsigned arg_chunk_max)
: m_valid(true), m_chunk_max(arg_chunk_max), m_chunks(arg_chunks) {}
ChunkedArrayManager(INACCESSIBLE_TAG, const unsigned arg_chunk_max,
const unsigned arg_chunk_size)
: m_chunk_max(arg_chunk_max), m_chunk_size(arg_chunk_size) {}
public:
template <typename Space, typename Enable_ = void>
struct IsAccessibleFrom;
template <typename Space>
struct IsAccessibleFrom<
Space, typename std::enable_if_t<Kokkos::Impl::MemorySpaceAccess<
MemorySpace, Space>::accessible>> : std::true_type {};
template <typename Space>
struct IsAccessibleFrom<
Space, typename std::enable_if_t<!Kokkos::Impl::MemorySpaceAccess<
MemorySpace, Space>::accessible>> : std::false_type {};
template <typename Space>
static ChunkedArrayManager<Space, ValueType> create_mirror(
ChunkedArrayManager<MemorySpace, ValueType> const& other,
typename std::enable_if<IsAccessibleFrom<Space>::value>::type* =
nullptr) {
return ChunkedArrayManager<Space, ValueType>{
ACCESSIBLE_TAG{}, other.m_chunks, other.m_chunk_max};
}
template <typename Space>
static ChunkedArrayManager<Space, ValueType> create_mirror(
ChunkedArrayManager<MemorySpace, ValueType> const& other,
typename std::enable_if<!IsAccessibleFrom<Space>::value>::type* =
nullptr) {
using tag_type =
typename ChunkedArrayManager<Space, ValueType>::INACCESSIBLE_TAG;
return ChunkedArrayManager<Space, ValueType>{tag_type{}, other.m_chunk_max,
other.m_chunk_size};
}
public:
void allocate_device(const std::string& label) {
if (m_chunks == nullptr) {
m_chunks = reinterpret_cast<pointer_type*>(MemorySpace().allocate(
label.c_str(), (sizeof(pointer_type) * (m_chunk_max + 2))));
}
}
void initialize() {
for (unsigned i = 0; i < m_chunk_max + 2; i++) {
m_chunks[i] = nullptr;
}
m_valid = true;
}
private:
/// Custom destroy functor for deallocating array chunks along with a linked
/// allocation
template <typename Space>
struct Destroy {
Destroy() = default;
Destroy(Destroy&&) = default;
Destroy(const Destroy&) = default;
Destroy& operator=(Destroy&&) = default;
Destroy& operator=(const Destroy&) = default;
Destroy(std::string label, value_type** arg_chunk,
const unsigned arg_chunk_max, const unsigned arg_chunk_size,
value_type** arg_linked)
: m_label(label),
m_chunks(arg_chunk),
m_linked(arg_linked),
m_chunk_max(arg_chunk_max),
m_chunk_size(arg_chunk_size) {}
void execute() {
// Destroy the array of chunk pointers.
// Two entries beyond the max chunks are allocation counters.
uintptr_t const len =
*reinterpret_cast<uintptr_t*>(m_chunks + m_chunk_max);
for (unsigned i = 0; i < len; i++) {
Space().deallocate(m_label.c_str(), m_chunks[i],
sizeof(value_type) * m_chunk_size);
}
// Destroy the linked allocation if we have one.
if (m_linked != nullptr) {
Space().deallocate(m_label.c_str(), m_linked,
(sizeof(value_type*) * (m_chunk_max + 2)));
}
}
void destroy_shared_allocation() { execute(); }
std::string m_label;
value_type** m_chunks = nullptr;
value_type** m_linked = nullptr;
unsigned m_chunk_max;
unsigned m_chunk_size;
};
public:
template <typename Space>
void allocate_with_destroy(const std::string& label,
pointer_type* linked_allocation = nullptr) {
using destroy_type = Destroy<Space>;
using record_type =
Kokkos::Impl::SharedAllocationRecord<MemorySpace, destroy_type>;
// Allocate + 2 extra slots so that *m_chunk[m_chunk_max] ==
// num_chunks_alloc and *m_chunk[m_chunk_max+1] == extent This must match in
// Destroy's execute(...) method
record_type* const record = record_type::allocate(
MemorySpace(), label, (sizeof(pointer_type) * (m_chunk_max + 2)));
m_chunks = static_cast<pointer_type*>(record->data());
m_track.assign_allocated_record_to_uninitialized(record);
record->m_destroy = destroy_type(label, m_chunks, m_chunk_max, m_chunk_size,
linked_allocation);
}
pointer_type* get_ptr() const { return m_chunks; }
template <typename Space>
typename std::enable_if<!IsAccessibleFrom<Space>::value>::type deep_copy_to(
ChunkedArrayManager<Space, ValueType> const& other) {
Kokkos::Impl::DeepCopy<Space, MemorySpace>(
other.m_chunks, m_chunks, sizeof(pointer_type) * (m_chunk_max + 2));
}
template <typename Space>
typename std::enable_if<IsAccessibleFrom<Space>::value>::type deep_copy_to(
ChunkedArrayManager<Space, ValueType> const&) {
// no-op
}
KOKKOS_INLINE_FUNCTION
pointer_type* operator+(int i) const { return m_chunks + i; }
KOKKOS_INLINE_FUNCTION
pointer_type& operator[](int i) const { return m_chunks[i]; }
track_type const& track() const { return m_track; }
KOKKOS_INLINE_FUNCTION
bool valid() const { return m_valid; }
private:
bool m_valid = false;
unsigned m_chunk_max = 0;
pointer_type* m_chunks = nullptr;
track_type m_track;
unsigned m_chunk_size = 0;
};
#ifdef KOKKOS_ENABLE_CUDA
template <>
struct ChunkArraySpace<Kokkos::CudaSpace> {
using memory_space = typename Kokkos::CudaUVMSpace;
};
#endif
#ifdef KOKKOS_ENABLE_HIP
template <>
struct ChunkArraySpace<Kokkos::Experimental::HIPSpace> {
using memory_space = typename Kokkos::Experimental::HIPHostPinnedSpace;
};
#endif
#ifdef KOKKOS_ENABLE_SYCL
template <>
struct ChunkArraySpace<Kokkos::Experimental::SYCLDeviceUSMSpace> {
using memory_space = typename Kokkos::Experimental::SYCLSharedUSMSpace;
};
#endif
} // end namespace Impl
} /* end namespace Impl */
/** \brief Dynamic views are restricted to rank-one and no layout.
* Resize only occurs on host outside of parallel_regions.
@ -93,6 +260,13 @@ class DynamicView : public Kokkos::ViewTraits<DataType, P...> {
public:
using traits = Kokkos::ViewTraits<DataType, P...>;
using value_type = typename traits::value_type;
using device_space = typename traits::memory_space;
using host_space =
typename Kokkos::Impl::HostMirror<device_space>::Space::memory_space;
using device_accessor = Impl::ChunkedArrayManager<device_space, value_type>;
using host_accessor = Impl::ChunkedArrayManager<host_space, value_type>;
private:
template <class, class...>
friend class DynamicView;
@ -108,7 +282,7 @@ class DynamicView : public Kokkos::ViewTraits<DataType, P...> {
"DynamicView only implemented for non-specialized View type");
template <class Space, bool = Kokkos::Impl::MemorySpaceAccess<
Space, typename traits::memory_space>::accessible>
Space, device_space>::accessible>
struct verify_space {
KOKKOS_FORCEINLINE_FUNCTION static void check() {}
};
@ -123,9 +297,8 @@ class DynamicView : public Kokkos::ViewTraits<DataType, P...> {
};
private:
track_type m_track;
typename traits::value_type** m_chunks =
nullptr; // array of pointers to 'chunks' of memory
device_accessor m_chunks;
host_accessor m_chunks_host;
unsigned m_chunk_shift; // ceil(log2(m_chunk_size))
unsigned m_chunk_mask; // m_chunk_size - 1
unsigned m_chunk_max; // number of entries in the chunk array - each pointing
@ -173,7 +346,8 @@ class DynamicView : public Kokkos::ViewTraits<DataType, P...> {
KOKKOS_INLINE_FUNCTION
size_t allocation_extent() const noexcept {
uintptr_t n = *reinterpret_cast<const uintptr_t*>(m_chunks + m_chunk_max);
uintptr_t n =
*reinterpret_cast<const uintptr_t*>(m_chunks_host + m_chunk_max);
return (n << m_chunk_shift);
}
@ -183,7 +357,7 @@ class DynamicView : public Kokkos::ViewTraits<DataType, P...> {
KOKKOS_INLINE_FUNCTION
size_t size() const noexcept {
size_t extent_0 =
*reinterpret_cast<const size_t*>(m_chunks + m_chunk_max + 1);
*reinterpret_cast<const size_t*>(m_chunks_host + m_chunk_max + 1);
return extent_0;
}
@ -215,10 +389,10 @@ class DynamicView : public Kokkos::ViewTraits<DataType, P...> {
// Allocation tracking properties
KOKKOS_INLINE_FUNCTION
int use_count() const { return m_track.use_count(); }
int use_count() const { return m_chunks_host.track().use_count(); }
inline const std::string label() const {
return m_track.template get_label<typename traits::memory_space>();
return m_chunks_host.track().template get_label<host_space>();
}
//----------------------------------------------------------------------
@ -285,13 +459,7 @@ class DynamicView : public Kokkos::ViewTraits<DataType, P...> {
* up to the maximum number of chunks
* */
template <typename IntType>
inline typename std::enable_if<
std::is_integral<IntType>::value &&
Kokkos::Impl::MemorySpaceAccess<
Kokkos::HostSpace,
typename Impl::ChunkArraySpace<
typename traits::memory_space>::memory_space>::accessible>::type
resize_serial(IntType const& n) {
inline void resize_serial(IntType const& n) {
using local_value_type = typename traits::value_type;
using value_pointer_type = local_value_type*;
@ -304,37 +472,40 @@ class DynamicView : public Kokkos::ViewTraits<DataType, P...> {
}
// *m_chunks[m_chunk_max] stores the current number of chunks being used
uintptr_t* const pc = reinterpret_cast<uintptr_t*>(m_chunks + m_chunk_max);
std::string _label =
m_track.template get_label<typename traits::memory_space>();
uintptr_t* const pc =
reinterpret_cast<uintptr_t*>(m_chunks_host + m_chunk_max);
std::string _label = m_chunks_host.track().template get_label<host_space>();
if (*pc < NC) {
while (*pc < NC) {
m_chunks[*pc] = reinterpret_cast<value_pointer_type>(
typename traits::memory_space().allocate(
m_chunks_host[*pc] =
reinterpret_cast<value_pointer_type>(device_space().allocate(
_label.c_str(), sizeof(local_value_type) << m_chunk_shift));
++*pc;
}
} else {
while (NC + 1 <= *pc) {
--*pc;
typename traits::memory_space().deallocate(
_label.c_str(), m_chunks[*pc],
sizeof(local_value_type) << m_chunk_shift);
m_chunks[*pc] = nullptr;
device_space().deallocate(_label.c_str(), m_chunks_host[*pc],
sizeof(local_value_type) << m_chunk_shift);
m_chunks_host[*pc] = nullptr;
}
}
// *m_chunks[m_chunk_max+1] stores the 'extent' requested by resize
// *m_chunks_host[m_chunk_max+1] stores the 'extent' requested by resize
*(pc + 1) = n;
m_chunks_host.deep_copy_to(m_chunks);
}
KOKKOS_INLINE_FUNCTION bool is_allocated() const {
if (m_chunks == nullptr) {
return false;
} else {
// *m_chunks[m_chunk_max] stores the current number of chunks being used
if (m_chunks_host.valid()) {
// *m_chunks_host[m_chunk_max] stores the current number of chunks being
// used
uintptr_t* const pc =
reinterpret_cast<uintptr_t*>(m_chunks + m_chunk_max);
reinterpret_cast<uintptr_t*>(m_chunks_host + m_chunk_max);
return (*(pc + 1) > 0);
} else {
return false;
}
}
@ -349,8 +520,8 @@ class DynamicView : public Kokkos::ViewTraits<DataType, P...> {
template <class RT, class... RP>
DynamicView(const DynamicView<RT, RP...>& rhs)
: m_track(rhs.m_track),
m_chunks((typename traits::value_type**)rhs.m_chunks),
: m_chunks(rhs.m_chunks),
m_chunks_host(rhs.m_chunks_host),
m_chunk_shift(rhs.m_chunk_shift),
m_chunk_mask(rhs.m_chunk_mask),
m_chunk_max(rhs.m_chunk_max),
@ -361,63 +532,6 @@ class DynamicView : public Kokkos::ViewTraits<DataType, P...> {
"Incompatible DynamicView copy construction");
}
//----------------------------------------------------------------------
struct Destroy {
using local_value_type = typename traits::value_type;
std::string m_label;
local_value_type** m_chunks;
unsigned m_chunk_max;
bool m_destroy;
unsigned m_chunk_size;
// Initialize or destroy array of chunk pointers.
// Two entries beyond the max chunks are allocation counters.
inline void operator()(unsigned i) const {
if (m_destroy && i < m_chunk_max && nullptr != m_chunks[i]) {
typename traits::memory_space().deallocate(
m_label.c_str(), m_chunks[i],
sizeof(local_value_type) * m_chunk_size);
}
m_chunks[i] = nullptr;
}
void execute(bool arg_destroy) {
using Range = Kokkos::RangePolicy<typename HostSpace::execution_space>;
m_destroy = arg_destroy;
Kokkos::Impl::ParallelFor<Destroy, Range> closure(
*this,
Range(0, m_chunk_max + 2)); // Add 2 to 'destroy' extra slots storing
// num_chunks and extent; previously + 1
closure.execute();
typename traits::execution_space().fence();
// Impl::ChunkArraySpace< typename traits::memory_space
// >::memory_space::execution_space().fence();
}
void construct_shared_allocation() { execute(false); }
void destroy_shared_allocation() { execute(true); }
Destroy() = default;
Destroy(Destroy&&) = default;
Destroy(const Destroy&) = default;
Destroy& operator=(Destroy&&) = default;
Destroy& operator=(const Destroy&) = default;
Destroy(std::string label, typename traits::value_type** arg_chunk,
const unsigned arg_chunk_max, const unsigned arg_chunk_size)
: m_label(label),
m_chunks(arg_chunk),
m_chunk_max(arg_chunk_max),
m_destroy(false),
m_chunk_size(arg_chunk_size) {}
};
/**\brief Allocation constructor
*
* Memory is allocated in chunks
@ -427,10 +541,7 @@ class DynamicView : public Kokkos::ViewTraits<DataType, P...> {
explicit inline DynamicView(const std::string& arg_label,
const unsigned min_chunk_size,
const unsigned max_extent)
: m_track(),
m_chunks(nullptr)
// The chunk size is guaranteed to be a power of two
,
: // The chunk size is guaranteed to be a power of two
m_chunk_shift(Kokkos::Impl::integral_power_of_two_that_contains(
min_chunk_size)) // div ceil(log2(min_chunk_size))
,
@ -440,28 +551,22 @@ class DynamicView : public Kokkos::ViewTraits<DataType, P...> {
m_chunk_shift) // max num pointers-to-chunks in array
,
m_chunk_size(2 << (m_chunk_shift - 1)) {
using chunk_array_memory_space = typename Impl::ChunkArraySpace<
typename traits::memory_space>::memory_space;
// A functor to deallocate all of the chunks upon final destruction
using record_type =
Kokkos::Impl::SharedAllocationRecord<chunk_array_memory_space, Destroy>;
m_chunks = device_accessor(m_chunk_max, m_chunk_size);
// Allocate chunk pointers and allocation counter
record_type* const record =
record_type::allocate(chunk_array_memory_space(), arg_label,
(sizeof(pointer_type) * (m_chunk_max + 2)));
// Allocate + 2 extra slots so that *m_chunk[m_chunk_max] ==
// num_chunks_alloc and *m_chunk[m_chunk_max+1] == extent This must match in
// Destroy's execute(...) method
m_chunks = reinterpret_cast<pointer_type*>(record->data());
record->m_destroy = Destroy(arg_label, m_chunks, m_chunk_max, m_chunk_size);
// Initialize to zero
record->m_destroy.construct_shared_allocation();
m_track.assign_allocated_record_to_uninitialized(record);
if (device_accessor::template IsAccessibleFrom<host_space>::value) {
m_chunks.template allocate_with_destroy<device_space>(arg_label);
m_chunks.initialize();
m_chunks_host =
device_accessor::template create_mirror<host_space>(m_chunks);
} else {
m_chunks.allocate_device(arg_label);
m_chunks_host =
device_accessor::template create_mirror<host_space>(m_chunks);
m_chunks_host.template allocate_with_destroy<device_space>(
arg_label, m_chunks.get_ptr());
m_chunks_host.initialize();
m_chunks_host.deep_copy_to(m_chunks);
}
}
};
@ -487,8 +592,8 @@ inline void deep_copy(const View<T, DP...>& dst,
enum {
DstExecCanAccessSrc =
Kokkos::Impl::SpaceAccessibility<dst_execution_space,
src_memory_space>::accessible
Kokkos::SpaceAccessibility<dst_execution_space,
src_memory_space>::accessible
};
if (DstExecCanAccessSrc) {
@ -512,8 +617,8 @@ inline void deep_copy(const Kokkos::Experimental::DynamicView<T, DP...>& dst,
enum {
DstExecCanAccessSrc =
Kokkos::Impl::SpaceAccessibility<dst_execution_space,
src_memory_space>::accessible
Kokkos::SpaceAccessibility<dst_execution_space,
src_memory_space>::accessible
};
if (DstExecCanAccessSrc) {

View File

@ -187,7 +187,8 @@ template <typename ReportType, typename DeviceType>
void ErrorReporter<ReportType, DeviceType>::resize(const size_t new_size) {
m_reports.resize(new_size);
m_reporters.resize(new_size);
typename DeviceType::execution_space().fence();
typename DeviceType::execution_space().fence(
"Kokkos::Experimental::ErrorReporter::resize: fence after resizing");
}
} // namespace Experimental

View File

@ -116,8 +116,7 @@ KOKKOS_INLINE_FUNCTION void offsetview_verify_operator_bounds(
This check should cover the case of Views that don't
have the Unmanaged trait but were initialized by pointer. */
if (tracker.has_record()) {
Kokkos::Impl::operator_bounds_error_on_device<MapType>(
map, Kokkos::Impl::has_printable_label_typedef<MapType>());
Kokkos::Impl::operator_bounds_error_on_device(map);
} else {
Kokkos::abort("OffsetView bounds error");
}
@ -1244,7 +1243,8 @@ class OffsetView : public ViewTraits<DataType, Properties...> {
// to avoid incomplete type errors from usng Kokkos::Cuda directly.
if (std::is_same<Kokkos::CudaUVMSpace,
typename traits::device_type::memory_space>::value) {
typename traits::device_type::memory_space::execution_space().fence();
typename traits::device_type::memory_space::execution_space().fence(
"Kokkos::OffsetView::OffsetView(): fence before UVM allocation");
}
#endif
//------------------------------------------------------------
@ -1256,7 +1256,8 @@ class OffsetView : public ViewTraits<DataType, Properties...> {
#if defined(KOKKOS_ENABLE_CUDA)
if (std::is_same<Kokkos::CudaUVMSpace,
typename traits::device_type::memory_space>::value) {
typename traits::device_type::memory_space::execution_space().fence();
typename traits::device_type::memory_space::execution_space().fence(
"Kokkos::OffsetView::OffsetView(): fence after UVM allocation");
}
#endif
//------------------------------------------------------------

View File

@ -834,7 +834,7 @@ class ScatterView<DataType, Layout, DeviceType, Op, ScatterNonDuplicated,
static_assert(std::is_same<typename dest_type::array_layout, Layout>::value,
"ScatterView contribute destination has different layout");
static_assert(
Kokkos::Impl::SpaceAccessibility<
Kokkos::SpaceAccessibility<
execution_space, typename dest_type::memory_space>::accessible,
"ScatterView contribute destination memory space not accessible");
if (dest.data() == internal_view.data()) return;
@ -1061,7 +1061,7 @@ class ScatterView<DataType, Kokkos::LayoutRight, DeviceType, Op,
Kokkos::LayoutRight>::value,
"ScatterView deep_copy destination has different layout");
static_assert(
Kokkos::Impl::SpaceAccessibility<
Kokkos::SpaceAccessibility<
execution_space, typename dest_type::memory_space>::accessible,
"ScatterView deep_copy destination memory space not accessible");
bool is_equal = (dest.data() == internal_view.data());
@ -1290,7 +1290,7 @@ class ScatterView<DataType, Kokkos::LayoutLeft, DeviceType, Op,
Kokkos::LayoutLeft>::value,
"ScatterView deep_copy destination has different layout");
static_assert(
Kokkos::Impl::SpaceAccessibility<
Kokkos::SpaceAccessibility<
execution_space, typename dest_type::memory_space>::accessible,
"ScatterView deep_copy destination memory space not accessible");
auto extent = internal_view.extent(internal_view_type::rank - 1);

View File

@ -405,7 +405,9 @@ class StaticCrsGraph {
Kokkos::parallel_for("Kokkos::StaticCrsGraph::create_block_partitioning",
Kokkos::RangePolicy<execution_space>(0, numRows()),
partitioner);
typename device_type::execution_space().fence();
typename device_type::execution_space().fence(
"Kokkos::StaticCrsGraph::create_block_partitioning:: fence after "
"partition");
row_block_offsets = block_offsets;
}

View File

@ -345,7 +345,8 @@ class UnorderedMap {
const impl_value_type tmp = impl_value_type();
Kokkos::deep_copy(m_values, tmp);
}
{ Kokkos::deep_copy(m_scalars, 0); }
Kokkos::deep_copy(m_scalars, 0);
m_size = 0;
}
KOKKOS_INLINE_FUNCTION constexpr bool is_allocated() const {
@ -393,9 +394,9 @@ class UnorderedMap {
///
/// This method has undefined behavior when erasable() is true.
///
/// Note that this is not a device function; it cannot be called in
/// Note that this is <i>not</i> a device function; it cannot be called in
/// a parallel kernel. The value is not stored as a variable; it
/// must be computed.
/// must be computed. m_size is a mutable cache of that value.
size_type size() const {
if (capacity() == 0u) return 0u;
if (modified()) {
@ -419,9 +420,13 @@ class UnorderedMap {
bool begin_erase() {
bool result = !erasable();
if (is_insertable_map && result) {
execution_space().fence();
execution_space().fence(
"Kokkos::UnorderedMap::begin_erase: fence before setting erasable "
"flag");
set_flag(erasable_idx);
execution_space().fence();
execution_space().fence(
"Kokkos::UnorderedMap::begin_erase: fence after setting erasable "
"flag");
}
return result;
}
@ -429,10 +434,12 @@ class UnorderedMap {
bool end_erase() {
bool result = erasable();
if (is_insertable_map && result) {
execution_space().fence();
execution_space().fence(
"Kokkos::UnorderedMap::end_erase: fence before erasing");
Impl::UnorderedMapErase<declared_map_type> f(*this);
f.apply();
execution_space().fence();
execution_space().fence(
"Kokkos::UnorderedMap::end_erase: fence after erasing");
reset_flag(erasable_idx);
}
return result;

View File

@ -119,12 +119,14 @@ class vector : public DualView<Scalar*, LayoutLeft, Arg1Type> {
if (DV::template need_sync<typename DV::t_dev::device_type>()) {
set_functor_host f(DV::h_view, val);
parallel_for("Kokkos::vector::assign", n, f);
typename DV::t_host::execution_space().fence();
typename DV::t_host::execution_space().fence(
"Kokkos::vector::assign: fence after assigning values");
DV::template modify<typename DV::t_host::device_type>();
} else {
set_functor f(DV::d_view, val);
parallel_for("Kokkos::vector::assign", n, f);
typename DV::t_dev::execution_space().fence();
typename DV::t_dev::execution_space().fence(
"Kokkos::vector::assign: fence after assigning values");
DV::template modify<typename DV::t_dev::device_type>();
}
}

View File

@ -57,22 +57,10 @@
namespace Kokkos {
namespace Impl {
KOKKOS_FORCEINLINE_FUNCTION
unsigned rotate_left(unsigned i, int r) {
constexpr int size = static_cast<int>(sizeof(unsigned) * CHAR_BIT);
return r ? ((i << r) | (i >> (size - r))) : i;
}
KOKKOS_FORCEINLINE_FUNCTION
unsigned rotate_right(unsigned i, int r) {
constexpr int size = static_cast<int>(sizeof(unsigned) * CHAR_BIT);
// FIXME_SYCL llvm.fshr.i32 missing
// (https://github.com/intel/llvm/issues/3308)
#ifdef __SYCL_DEVICE_ONLY__
return rotate_left(i, size - r);
#else
return r ? ((i >> r) | (i << (size - r))) : i;
#endif
}
template <typename Bitset>

View File

@ -75,7 +75,7 @@ uint32_t fmix32(uint32_t h) {
KOKKOS_INLINE_FUNCTION
uint32_t MurmurHash3_x86_32(const void* key, int len, uint32_t seed) {
const uint8_t* data = (const uint8_t*)key;
const uint8_t* data = static_cast<const uint8_t*>(key);
const int nblocks = len / 4;
uint32_t h1 = seed;

View File

@ -49,7 +49,7 @@
#include <iostream>
#include <cstdlib>
#include <cstdio>
#include <impl/Kokkos_Timer.hpp>
#include <Kokkos_Timer.hpp>
#include <Kokkos_DualView.hpp>
namespace Test {

View File

@ -702,6 +702,11 @@ class TestDynViewAPI {
using View0 = Kokkos::View<T, device>;
using View1 = Kokkos::View<T*, device>;
using View2 = Kokkos::View<T**, device>;
using View3 = Kokkos::View<T***, device>;
using View4 = Kokkos::View<T****, device>;
using View5 = Kokkos::View<T*****, device>;
using View6 = Kokkos::View<T******, device>;
using View7 = Kokkos::View<T*******, device>;
using host_view_space = typename View0::host_mirror_space;
@ -1065,7 +1070,7 @@ class TestDynViewAPI {
dView0 d_uninitialized(
Kokkos::view_alloc(Kokkos::WithoutInitializing, "uninit"), 10, 20);
ASSERT_TRUE(d_uninitialized.data() != nullptr);
ASSERT_NE(d_uninitialized.data(), nullptr);
ASSERT_EQ(d_uninitialized.rank(), 2);
ASSERT_EQ(d_uninitialized.extent(0), 10);
ASSERT_EQ(d_uninitialized.extent(1), 20);
@ -1075,14 +1080,14 @@ class TestDynViewAPI {
hView0 hx, hy, hz;
ASSERT_TRUE(Kokkos::is_dyn_rank_view<dView0>::value);
ASSERT_FALSE(Kokkos::is_dyn_rank_view<Kokkos::View<double> >::value);
ASSERT_FALSE(Kokkos::is_dyn_rank_view<Kokkos::View<double>>::value);
ASSERT_TRUE(dx.data() == nullptr); // Okay with UVM
ASSERT_TRUE(dy.data() == nullptr); // Okay with UVM
ASSERT_TRUE(dz.data() == nullptr); // Okay with UVM
ASSERT_TRUE(hx.data() == nullptr);
ASSERT_TRUE(hy.data() == nullptr);
ASSERT_TRUE(hz.data() == nullptr);
ASSERT_EQ(dx.data(), nullptr); // Okay with UVM
ASSERT_EQ(dy.data(), nullptr); // Okay with UVM
ASSERT_EQ(dz.data(), nullptr); // Okay with UVM
ASSERT_EQ(hx.data(), nullptr);
ASSERT_EQ(hy.data(), nullptr);
ASSERT_EQ(hz.data(), nullptr);
ASSERT_EQ(dx.extent(0), 0u); // Okay with UVM
ASSERT_EQ(dy.extent(0), 0u); // Okay with UVM
ASSERT_EQ(dz.extent(0), 0u); // Okay with UVM
@ -1153,11 +1158,11 @@ class TestDynViewAPI {
ASSERT_EQ(dx.use_count(), size_t(2));
ASSERT_FALSE(dx.data() == nullptr);
ASSERT_FALSE(const_dx.data() == nullptr);
ASSERT_FALSE(unmanaged_dx.data() == nullptr);
ASSERT_FALSE(unmanaged_from_ptr_dx.data() == nullptr);
ASSERT_FALSE(dy.data() == nullptr);
ASSERT_NE(dx.data(), nullptr);
ASSERT_NE(const_dx.data(), nullptr);
ASSERT_NE(unmanaged_dx.data(), nullptr);
ASSERT_NE(unmanaged_from_ptr_dx.data(), nullptr);
ASSERT_NE(dy.data(), nullptr);
ASSERT_NE(dx, dy);
ASSERT_EQ(dx.extent(0), unsigned(N0));
@ -1317,17 +1322,17 @@ class TestDynViewAPI {
ASSERT_NE(dx, dz);
dx = dView0();
ASSERT_TRUE(dx.data() == nullptr);
ASSERT_FALSE(dy.data() == nullptr);
ASSERT_FALSE(dz.data() == nullptr);
ASSERT_EQ(dx.data(), nullptr);
ASSERT_NE(dy.data(), nullptr);
ASSERT_NE(dz.data(), nullptr);
dy = dView0();
ASSERT_TRUE(dx.data() == nullptr);
ASSERT_TRUE(dy.data() == nullptr);
ASSERT_FALSE(dz.data() == nullptr);
ASSERT_EQ(dx.data(), nullptr);
ASSERT_EQ(dy.data(), nullptr);
ASSERT_NE(dz.data(), nullptr);
dz = dView0();
ASSERT_TRUE(dx.data() == nullptr);
ASSERT_TRUE(dy.data() == nullptr);
ASSERT_TRUE(dz.data() == nullptr);
ASSERT_EQ(dx.data(), nullptr);
ASSERT_EQ(dy.data(), nullptr);
ASSERT_EQ(dz.data(), nullptr);
// View - DynRankView Interoperability tests
// deep_copy from view to dynrankview
@ -1367,7 +1372,7 @@ class TestDynViewAPI {
static void check_auto_conversion_to_const(
const Kokkos::DynRankView<const DataType, device>& arg_const,
const Kokkos::DynRankView<DataType, device>& arg) {
ASSERT_TRUE(arg_const == arg);
ASSERT_EQ(arg_const, arg);
}
static void run_test_allocated() {
@ -1396,8 +1401,8 @@ class TestDynViewAPI {
const_typeX xc = x;
const_typeR xr = x;
ASSERT_TRUE(xc == x);
ASSERT_TRUE(x == xc);
ASSERT_EQ(xc, x);
ASSERT_EQ(x, xc);
// For CUDA the constant random access View does not return
// an lvalue reference due to retrieving through texture cache
@ -1406,7 +1411,7 @@ class TestDynViewAPI {
if (!std::is_same<typename device::execution_space, Kokkos::Cuda>::value)
#endif
{
ASSERT_TRUE(x.data() == xr.data());
ASSERT_EQ(x.data(), xr.data());
}
// typeX xf = xc ; // setting non-const from const must not compile
@ -1659,29 +1664,29 @@ class TestDynViewAPI {
const_svector_right_type cvr3 =
Kokkos::subdynrankview(mv, Kokkos::ALL(), 2);
ASSERT_TRUE(&v1[0] == &v1(0));
ASSERT_TRUE(&v1[0] == &mv(0, 0));
ASSERT_TRUE(&v2[0] == &mv(0, 1));
ASSERT_TRUE(&v3[0] == &mv(0, 2));
ASSERT_EQ(&v1[0], &v1(0));
ASSERT_EQ(&v1[0], &mv(0, 0));
ASSERT_EQ(&v2[0], &mv(0, 1));
ASSERT_EQ(&v3[0], &mv(0, 2));
ASSERT_TRUE(&cv1[0] == &mv(0, 0));
ASSERT_TRUE(&cv2[0] == &mv(0, 1));
ASSERT_TRUE(&cv3[0] == &mv(0, 2));
ASSERT_EQ(&cv1[0], &mv(0, 0));
ASSERT_EQ(&cv2[0], &mv(0, 1));
ASSERT_EQ(&cv3[0], &mv(0, 2));
ASSERT_TRUE(&vr1[0] == &mv(0, 0));
ASSERT_TRUE(&vr2[0] == &mv(0, 1));
ASSERT_TRUE(&vr3[0] == &mv(0, 2));
ASSERT_EQ(&vr1[0], &mv(0, 0));
ASSERT_EQ(&vr2[0], &mv(0, 1));
ASSERT_EQ(&vr3[0], &mv(0, 2));
ASSERT_TRUE(&cvr1[0] == &mv(0, 0));
ASSERT_TRUE(&cvr2[0] == &mv(0, 1));
ASSERT_TRUE(&cvr3[0] == &mv(0, 2));
ASSERT_EQ(&cvr1[0], &mv(0, 0));
ASSERT_EQ(&cvr2[0], &mv(0, 1));
ASSERT_EQ(&cvr3[0], &mv(0, 2));
ASSERT_TRUE(&mv1(0, 0) == &mv(1, 2));
ASSERT_TRUE(&mv1(1, 1) == &mv(2, 3));
ASSERT_TRUE(&mv1(3, 2) == &mv(4, 4));
ASSERT_TRUE(&mvr1(0, 0) == &mv_right(1, 2));
ASSERT_TRUE(&mvr1(1, 1) == &mv_right(2, 3));
ASSERT_TRUE(&mvr1(3, 2) == &mv_right(4, 4));
ASSERT_EQ(&mv1(0, 0), &mv(1, 2));
ASSERT_EQ(&mv1(1, 1), &mv(2, 3));
ASSERT_EQ(&mv1(3, 2), &mv(4, 4));
ASSERT_EQ(&mvr1(0, 0), &mv_right(1, 2));
ASSERT_EQ(&mvr1(1, 1), &mv_right(2, 3));
ASSERT_EQ(&mvr1(3, 2), &mv_right(4, 4));
const_svector_type c_cv1(v1);
typename svector_type::const_type c_cv2(v2);

View File

@ -52,7 +52,7 @@
#include <Kokkos_Core.hpp>
#include <Kokkos_DynamicView.hpp>
#include <impl/Kokkos_Timer.hpp>
#include <Kokkos_Timer.hpp>
namespace Test {

View File

@ -50,7 +50,7 @@
#include <iostream>
#include <cstdlib>
#include <cstdio>
#include <impl/Kokkos_Timer.hpp>
#include <Kokkos_Timer.hpp>
#include <Kokkos_OffsetView.hpp>
#include <KokkosExp_MDRangePolicy.hpp>

Some files were not shown because too many files have changed in this diff Show More