Merge branch 'develop' into next_release

This commit is contained in:
Axel Kohlmeyer
2025-04-02 14:11:45 -04:00
467 changed files with 15726 additions and 9597 deletions

1
.github/CODEOWNERS vendored
View File

@ -71,6 +71,7 @@ src/EXTRA-COMMAND/group_ndx.* @akohlmey
src/EXTRA-COMMAND/ndx_group.* @akohlmey src/EXTRA-COMMAND/ndx_group.* @akohlmey
src/EXTRA-COMPUTE/compute_stress_mop*.* @RomainVermorel src/EXTRA-COMPUTE/compute_stress_mop*.* @RomainVermorel
src/EXTRA-COMPUTE/compute_born_matrix.* @Bibobu @athomps src/EXTRA-COMPUTE/compute_born_matrix.* @Bibobu @athomps
src/EXTRA-DUMP/dump_extxyz.* @fxcoudert
src/EXTRA-FIX/fix_deform_pressure.* @jtclemm src/EXTRA-FIX/fix_deform_pressure.* @jtclemm
src/EXTRA-PAIR/pair_dispersion_d3.* @soniasolomoni @arthurfl src/EXTRA-PAIR/pair_dispersion_d3.* @soniasolomoni @arthurfl
src/EXTRA-PAIR/d3_parameters.h @soniasolomoni @arthurfl src/EXTRA-PAIR/d3_parameters.h @soniasolomoni @arthurfl

View File

@ -57,8 +57,8 @@ if(DOWNLOAD_KOKKOS)
list(APPEND KOKKOS_LIB_BUILD_ARGS "-DCMAKE_CXX_EXTENSIONS=${CMAKE_CXX_EXTENSIONS}") list(APPEND KOKKOS_LIB_BUILD_ARGS "-DCMAKE_CXX_EXTENSIONS=${CMAKE_CXX_EXTENSIONS}")
list(APPEND KOKKOS_LIB_BUILD_ARGS "-DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}") list(APPEND KOKKOS_LIB_BUILD_ARGS "-DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}")
include(ExternalProject) include(ExternalProject)
set(KOKKOS_URL "https://github.com/kokkos/kokkos/archive/4.5.01.tar.gz" CACHE STRING "URL for KOKKOS tarball") set(KOKKOS_URL "https://github.com/kokkos/kokkos/archive/4.6.00.tar.gz" CACHE STRING "URL for KOKKOS tarball")
set(KOKKOS_MD5 "4d832aa0284169d9e3fbae3165286bc6" CACHE STRING "MD5 checksum of KOKKOS tarball") set(KOKKOS_MD5 "61b2b69ae50d83eedcc7d47a3fa3d6cb" CACHE STRING "MD5 checksum of KOKKOS tarball")
mark_as_advanced(KOKKOS_URL) mark_as_advanced(KOKKOS_URL)
mark_as_advanced(KOKKOS_MD5) mark_as_advanced(KOKKOS_MD5)
GetFallbackURL(KOKKOS_URL KOKKOS_FALLBACK) GetFallbackURL(KOKKOS_URL KOKKOS_FALLBACK)
@ -83,7 +83,7 @@ if(DOWNLOAD_KOKKOS)
add_dependencies(LAMMPS::KOKKOSCORE kokkos_build) add_dependencies(LAMMPS::KOKKOSCORE kokkos_build)
add_dependencies(LAMMPS::KOKKOSCONTAINERS kokkos_build) add_dependencies(LAMMPS::KOKKOSCONTAINERS kokkos_build)
elseif(EXTERNAL_KOKKOS) elseif(EXTERNAL_KOKKOS)
find_package(Kokkos 4.5.01 REQUIRED CONFIG) find_package(Kokkos 4.6.00 REQUIRED CONFIG)
target_link_libraries(lammps PRIVATE Kokkos::kokkos) target_link_libraries(lammps PRIVATE Kokkos::kokkos)
else() else()
set(LAMMPS_LIB_KOKKOS_SRC_DIR ${LAMMPS_LIB_SOURCE_DIR}/kokkos) set(LAMMPS_LIB_KOKKOS_SRC_DIR ${LAMMPS_LIB_SOURCE_DIR}/kokkos)

View File

@ -611,6 +611,9 @@ They must be specified in uppercase.
* - ZEN3 * - ZEN3
- HOST - HOST
- AMD Zen3 architecture - AMD Zen3 architecture
* - ZEN4
- HOST
- AMD Zen4 architecture
* - RISCV_SG2042 * - RISCV_SG2042
- HOST - HOST
- SG2042 (RISC-V) CPUs - SG2042 (RISC-V) CPUs
@ -714,7 +717,7 @@ They must be specified in uppercase.
- GPU - GPU
- Intel GPU Ponte Vecchio - Intel GPU Ponte Vecchio
This list was last updated for version 4.5.1 of the Kokkos library. This list was last updated for version 4.6.0 of the Kokkos library.
.. tabs:: .. tabs::

View File

@ -128,7 +128,7 @@ OPT.
* :doc:`harmonic (iko) <dihedral_harmonic>` * :doc:`harmonic (iko) <dihedral_harmonic>`
* :doc:`helix (o) <dihedral_helix>` * :doc:`helix (o) <dihedral_helix>`
* :doc:`lepton (o) <dihedral_lepton>` * :doc:`lepton (o) <dihedral_lepton>`
* :doc:`multi/harmonic (o) <dihedral_multi_harmonic>` * :doc:`multi/harmonic (ko) <dihedral_multi_harmonic>`
* :doc:`nharmonic (o) <dihedral_nharmonic>` * :doc:`nharmonic (o) <dihedral_nharmonic>`
* :doc:`opls (iko) <dihedral_opls>` * :doc:`opls (iko) <dihedral_opls>`
* :doc:`quadratic (o) <dihedral_quadratic>` * :doc:`quadratic (o) <dihedral_quadratic>`

View File

@ -19,6 +19,7 @@ An alphabetic list of all LAMMPS :doc:`dump <dump>` commands.
* :doc:`custom/gz <dump>` * :doc:`custom/gz <dump>`
* :doc:`custom/zstd <dump>` * :doc:`custom/zstd <dump>`
* :doc:`dcd <dump>` * :doc:`dcd <dump>`
* :doc:`extxyz <dump>`
* :doc:`grid <dump>` * :doc:`grid <dump>`
* :doc:`grid/vtk <dump>` * :doc:`grid/vtk <dump>`
* :doc:`h5md <dump_h5md>` * :doc:`h5md <dump_h5md>`

View File

@ -186,6 +186,7 @@ OPT.
* :doc:`qeq/fire <fix_qeq>` * :doc:`qeq/fire <fix_qeq>`
* :doc:`qeq/point <fix_qeq>` * :doc:`qeq/point <fix_qeq>`
* :doc:`qeq/reaxff (ko) <fix_qeq_reaxff>` * :doc:`qeq/reaxff (ko) <fix_qeq_reaxff>`
* :doc:`qeq/rel/reaxff <fix_qeq_rel_reaxff>`
* :doc:`qeq/shielded <fix_qeq>` * :doc:`qeq/shielded <fix_qeq>`
* :doc:`qeq/slater <fix_qeq>` * :doc:`qeq/slater <fix_qeq>`
* :doc:`qmmm <fix_qmmm>` * :doc:`qmmm <fix_qmmm>`

View File

@ -4,7 +4,7 @@
dihedral_style multi/harmonic command dihedral_style multi/harmonic command
===================================== =====================================
Accelerator Variants: *multi/harmonic/omp* Accelerator Variants: *multi/harmonic/kk*, *multi/harmonic/omp*
Syntax Syntax
"""""" """"""

View File

@ -3,6 +3,7 @@
.. index:: dump cfg .. index:: dump cfg
.. index:: dump custom .. index:: dump custom
.. index:: dump dcd .. index:: dump dcd
.. index:: dump extxyz
.. index:: dump grid .. index:: dump grid
.. index:: dump grid/vtk .. index:: dump grid/vtk
.. index:: dump local .. index:: dump local
@ -59,7 +60,7 @@ Syntax
* ID = user-assigned name for the dump * ID = user-assigned name for the dump
* group-ID = ID of the group of atoms to be dumped * group-ID = ID of the group of atoms to be dumped
* style = *atom* or *atom/adios* or *atom/gz* or *atom/zstd* or *cfg* or *cfg/gz* or *cfg/zstd* or *cfg/uef* or *custom* or *custom/gz* or *custom/zstd* or *custom/adios* or *dcd* or *grid* or *grid/vtk* or *h5md* or *image* or *local* or *local/gz* or *local/zstd* or *molfile* or *movie* or *netcdf* or *netcdf/mpiio* or *vtk* or *xtc* or *xyz* or *xyz/gz* or *xyz/zstd* or *yaml* * style = *atom* or *atom/adios* or *atom/gz* or *atom/zstd* or *cfg* or *cfg/gz* or *cfg/zstd* or *cfg/uef* or *custom* or *custom/gz* or *custom/zstd* or *custom/adios* or *dcd* or *extxyz* or *grid* or *grid/vtk* or *h5md* or *image* or *local* or *local/gz* or *local/zstd* or *molfile* or *movie* or *netcdf* or *netcdf/mpiio* or *vtk* or *xtc* or *xyz* or *xyz/gz* or *xyz/zstd* or *yaml*
* N = dump on timesteps which are multiples of N * N = dump on timesteps which are multiples of N
* file = name of file to write dump info to * file = name of file to write dump info to
* attribute1,attribute2,... = list of attributes for a particular style * attribute1,attribute2,... = list of attributes for a particular style
@ -77,6 +78,7 @@ Syntax
*custom*, *custom/gz*, *custom/zstd* attributes = see below *custom*, *custom/gz*, *custom/zstd* attributes = see below
*custom/adios* attributes = same as *custom* attributes, discussed on :doc:`dump custom/adios <dump_adios>` page *custom/adios* attributes = same as *custom* attributes, discussed on :doc:`dump custom/adios <dump_adios>` page
*dcd* attributes = none *dcd* attributes = none
*extxyz* attributes = none
*h5md* attributes = discussed on :doc:`dump h5md <dump_h5md>` page *h5md* attributes = discussed on :doc:`dump h5md <dump_h5md>` page
*grid* attributes = see below *grid* attributes = see below
*grid/vtk* attributes = see below *grid/vtk* attributes = see below
@ -242,28 +244,29 @@ all the processors or multiple smaller files.
frames consistently to the same atom. This can lead to incorrect frames consistently to the same atom. This can lead to incorrect
visualizations or results. LAMMPS will print a warning in such cases. visualizations or results. LAMMPS will print a warning in such cases.
For the *atom*, *custom*, *cfg*, *grid*, and *local* styles, sorting For the *atom*, *custom*, *cfg*, *grid*, and *local* styles, sorting is
is off by default. For the *dcd*, *grid/vtk*, *xtc*, *xyz*, and off by default. For the *dcd*, *extxyz*, *grid/vtk*, *xtc*, *xyz*, and
*molfile* styles, sorting by atom ID or grid ID is on by default. See *molfile* styles, sorting by atom ID or grid ID is on by default. See
the :doc:`dump_modify <dump_modify>` page for details. the :doc:`dump_modify <dump_modify>` page for details.
The *style* keyword determines what kind of data is written to the The *style* keyword determines what kind of data is written to the
dump file(s) and in what format. dump file(s) and in what format.
Note that *atom*, *custom*, *dcd*, *xtc*, *xyz*, and *yaml* style dump Note that *atom*, *custom*, *dcd*, *extxyz*, *xtc*, *xyz*, and *yaml*
files can be read directly by `VMD <https://www.ks.uiuc.edu/Research/vmd>`_, style dump files can be read directly by `VMD
a popular tool for visualizing and analyzing trajectories from atomic <https://www.ks.uiuc.edu/Research/vmd>`_, a popular tool for visualizing
and molecular systems. For reading *netcdf* style dump files, the and analyzing trajectories from atomic and molecular systems. For
netcdf plugin needs to be recompiled from source using a NetCDF version reading *netcdf* style dump files, the netcdf plugin needs to be
compatible with the one used by LAMMPS. The bundled plugin binary recompiled from source using a NetCDF version compatible with the one
uses a very old version of NetCDF that is not compatible with LAMMPS. used by LAMMPS. The bundled plugin binary uses a very old version of
NetCDF that is not compatible with LAMMPS.
Likewise the `OVITO visualization package <https://www.ovito.org>`_, Likewise the `OVITO visualization package <https://www.ovito.org>`_,
popular for materials modeling, can read the *atom*, *custom*, popular for materials modeling, can read the *atom*, *custom*, *extxyz*,
*local*, *xtc*, *cfg*, *netcdf*, and *xyz* style atom dump files *local*, *xtc*, *cfg*, *netcdf*, and *xyz* style atom dump files
directly. With version 3.8 and above, OVITO can also read and directly. With version 3.8 and above, OVITO can also read and visualize
visualize *grid* style dump files with grid cell data, including *grid* style dump files with grid cell data, including iso-surface
iso-surface images of the grid cell values. images of the grid cell values.
Note that settings made via the :doc:`dump_modify <dump_modify>` Note that settings made via the :doc:`dump_modify <dump_modify>`
command can also alter the format of individual values and content of command can also alter the format of individual values and content of
@ -475,6 +478,24 @@ label). This option will help many visualization programs to guess bonds
and colors. You can use the :doc:`dump_modify types labels <dump_modify>` and colors. You can use the :doc:`dump_modify types labels <dump_modify>`
option to replace numeric atom types with :doc:`type labels <Howto_type_labels>`. option to replace numeric atom types with :doc:`type labels <Howto_type_labels>`.
.. versionadded:: TBD
The *extxyz* style writes XYZ files compatible with the Extended XYZ (or
ExtXYZ) format as defined as defined in `the libAtoms specification
<https://github.com/libAtoms/extxyz>`_. Specifically, the following
information will be dumped:
* timestep
* time, which can be disabled with :doc:`dump_modify time no <dump_modify>`
* simulation box lattice and pbc conditions
* atomic forces, which can be disabled with :doc:`dump_modify forces no <dump_modify>`
* atomic velocities, which can be disabled with :doc:`dump_modify vel no <dump_modify>`
* atomic masses, if enabled with :doc:`dump_modify mass yes <dump_modify>`
Dump style *extxyz* requires either that a :doc:`type label map for atoms types
<labelmap>` is defined or :doc:`dump_modify element <dump_modify>` is used to
set up an atom type number to atom name mapping.
.. versionadded:: 22Dec2022 .. versionadded:: 22Dec2022
The *grid/vtk* style writes VTK files for grid data on a regular The *grid/vtk* style writes VTK files for grid data on a regular
@ -607,8 +628,8 @@ with the processor ID from :math:`0` to :math:`P-1`. For example,
tmp.dump.% becomes tmp.dump.0, tmp.dump.1, ... tmp.dump.:math:`P-1`, tmp.dump.% becomes tmp.dump.0, tmp.dump.1, ... tmp.dump.:math:`P-1`,
etc. This creates smaller files and can be a fast mode of output on etc. This creates smaller files and can be a fast mode of output on
parallel machines that support parallel I/O for output. This option is parallel machines that support parallel I/O for output. This option is
**not** available for the *dcd*, *xtc*, *xyz*, *grid/vtk*, and *yaml* **not** available for the *dcd*, *extxyz*, *xtc*, *xyz*, *grid/vtk*, and
styles. *yaml* styles.
By default, :math:`P` is the the number of processors, meaning one file per By default, :math:`P` is the the number of processors, meaning one file per
processor, but :math:`P` can be set to a smaller value via the *nfile* or processor, but :math:`P` can be set to a smaller value via the *nfile* or
@ -1017,9 +1038,9 @@ the COMPRESS package. They are only enabled if LAMMPS was built with
that package. See the :doc:`Build package <Build_package>` page for that package. See the :doc:`Build package <Build_package>` page for
more info. more info.
The *xtc*, *dcd*, and *yaml* styles are part of the EXTRA-DUMP package. The *dcd*, *extxyz*, *xtc*, and *yaml* styles are part of the EXTRA-DUMP
They are only enabled if LAMMPS was built with that package. See the package. They are only enabled if LAMMPS was built with that package.
:doc:`Build package <Build_package>` page for more info. See the :doc:`Build package <Build_package>` page for more info.
Related commands Related commands
"""""""""""""""" """"""""""""""""

View File

@ -92,6 +92,15 @@ Syntax
see the :doc:`dump image <dump_image>` doc page for details see the :doc:`dump image <dump_image>` doc page for details
* these keywords apply only to the extxyz dump style
* keyword = *forces* or *mass* or *vel*
.. parsed-literal::
*forces* arg = *yes* or *no*
*mass* arg = *yes* or *no*
*vel* arg = *yes* or *no*
* these keywords apply only to the */gz* and */zstd* dump styles * these keywords apply only to the */gz* and */zstd* dump styles
* keyword = *compression_level* * keyword = *compression_level*
@ -972,9 +981,11 @@ The option defaults are
* fileper = # of processors * fileper = # of processors
* first = no * first = no
* flush = yes * flush = yes
* forces = yes
* format = %d and %g for each integer or floating point value * format = %d and %g for each integer or floating point value
* image = no * image = no
* label = ENTRIES * label = ENTRIES
* mass = no
* maxfiles = -1 * maxfiles = -1
* nfile = 1 * nfile = 1
* pad = 0 * pad = 0
@ -990,6 +1001,7 @@ The option defaults are
* types = numeric * types = numeric
* units = no * units = no
* unwrap = no * unwrap = no
* vel = yes
* compression_level = 9 (gz variants) * compression_level = 9 (gz variants)
* compression_level = 0 (zstd variants) * compression_level = 0 (zstd variants)

View File

@ -365,6 +365,7 @@ accelerated styles exist.
* :doc:`qeq/fire <fix_qeq>` - charge equilibration via FIRE minimizer * :doc:`qeq/fire <fix_qeq>` - charge equilibration via FIRE minimizer
* :doc:`qeq/point <fix_qeq>` - charge equilibration via point method * :doc:`qeq/point <fix_qeq>` - charge equilibration via point method
* :doc:`qeq/reaxff <fix_qeq_reaxff>` - charge equilibration for ReaxFF potential * :doc:`qeq/reaxff <fix_qeq_reaxff>` - charge equilibration for ReaxFF potential
* :doc:`qeq/rel/reaxff <fix_qeq_rel_reaxff>` - charge equilibration for ReaxFF potential with alternate efield implementation
* :doc:`qeq/shielded <fix_qeq>` - charge equilibration via shielded method * :doc:`qeq/shielded <fix_qeq>` - charge equilibration via shielded method
* :doc:`qeq/slater <fix_qeq>` - charge equilibration via Slater method * :doc:`qeq/slater <fix_qeq>` - charge equilibration via Slater method
* :doc:`qmmm <fix_qmmm>` - functionality to enable a quantum mechanics/molecular mechanics coupling * :doc:`qmmm <fix_qmmm>` - functionality to enable a quantum mechanics/molecular mechanics coupling

View File

@ -123,8 +123,10 @@ components in non-periodic directions.
Related commands Related commands
"""""""""""""""" """"""""""""""""
:doc:`pair_style reaxff <pair_reaxff>`, :doc:`fix qeq/reaxff <fix_qeq_reaxff>`, :doc:`pair_style reaxff <pair_reaxff>`,
:doc:`fix qtpi/reaxff <fix_qtpie_reaxff>` :doc:`fix qeq/reaxff <fix_qeq_reaxff>`,
:doc:`fix qtpie/reaxff <fix_qtpie_reaxff>`,
:doc:`fix qeq/rel/reaxff <fix_qeq_rel_reaxff>`
Default Default
""""""" """""""

View File

@ -14,7 +14,7 @@ Syntax
* adapt = style name of this fix command * adapt = style name of this fix command
* N = adapt simulation settings every this many timesteps * N = adapt simulation settings every this many timesteps
* one or more attribute/arg pairs may be appended * one or more attribute/arg pairs may be appended
* attribute = *pair* or *bond* or *angle* or *kspace* or *atom* * attribute = *pair* or *bond* or *angle* or *improper* or *kspace* or *atom*
.. parsed-literal:: .. parsed-literal::
@ -33,6 +33,11 @@ Syntax
aparam = parameter to adapt over time aparam = parameter to adapt over time
I = type angle to set parameter for (integer or type label) I = type angle to set parameter for (integer or type label)
v_name = variable with name that calculates value of aparam v_name = variable with name that calculates value of aparam
*improper* args = istyle iparam I v_name
istyle = improper style name (e.g., cvff)
iparam = parameter to adapt over time
I = type improper to set parameter for (integer or type label)
v_name = variable with name that calculates value of iparam
*kspace* arg = v_name *kspace* arg = v_name
v_name = variable with name that calculates scale factor on :math:`k`-space terms v_name = variable with name that calculates scale factor on :math:`k`-space terms
*atom* args = atomparam v_name *atom* args = atomparam v_name
@ -428,6 +433,56 @@ this fix uses to reset theta0 needs to generate values in radians.
---------- ----------
.. versionadded:: TBD
The *improper* keyword uses the specified variable to change the value of
an improper coefficient over time, very similar to how the *angle* keyword
operates. The only difference is that now an improper coefficient for a
given improper type is adapted.
A wild-card asterisk can be used in place of or in conjunction with the
improper type argument to set the coefficients for multiple improper types.
This takes the form "\*" or "\*n" or "m\*" or "m\*n". If :math:`N` is
the number of improper types, then an asterisk with no numeric values means
all types from 1 to :math:`N`. A leading asterisk means all types from
1 to n (inclusive). A trailing asterisk means all types from m to
:math:`N` (inclusive). A middle asterisk means all types from m to n
(inclusive).
If :doc:`improper_style hybrid <improper_hybrid>` is used, *istyle* should be a
sub-style name. The improper styles that currently work with fix adapt are:
+---------------------------------------------------------+----------------+----------------+
| :doc:`amoeba <improper_amoeba>` | k | type impropers |
+---------------------------------------------------------+----------------+----------------+
| :doc:`class2 <improper_class2>` | k,chi0 | type impropers |
+---------------------------------------------------------+----------------+----------------+
| :doc:`cossq <improper_cossq>` | k,chi0 | type impropers |
+---------------------------------------------------------+----------------+----------------+
| :doc:`cvff <improper_cvff>` | k,d,n | type impropers |
+---------------------------------------------------------+----------------+----------------+
| :doc:`distance <improper_distance>` | k2,k4 | type impropers |
+---------------------------------------------------------+----------------+----------------+
| :doc:`distharm <improper_distharm>` | k,d0 | type impropers |
+---------------------------------------------------------+----------------+----------------+
| :doc:`fourier <improper_fourier>` | k,C0,C1,C2 | type impropers |
+---------------------------------------------------------+----------------+----------------+
| :doc:`harmonic <improper_harmonic>` | k,chi0 | type impropers |
+---------------------------------------------------------+----------------+----------------+
| :doc:`inversion/harmonic <improper_inversion_harmonic>` | k,w0 | type impropers |
+---------------------------------------------------------+----------------+----------------+
| :doc:`ring <improper_ring>` | k,theta0 | type impropers |
+---------------------------------------------------------+----------------+----------------+
| :doc:`umbrella <improper_umbrella>` | k,w0 | type impropers |
+---------------------------------------------------------+----------------+----------------+
| :doc:`sqdistharm <improper_sqdistharm>` | k | type impropers |
+---------------------------------------------------------+----------------+----------------+
Note that internally, chi0 and theta0 are stored in radians, so the variable
this fix use to reset chi0 or theta0 needs to generate values in radians.
----------
The *kspace* keyword used the specified variable as a scale factor on The *kspace* keyword used the specified variable as a scale factor on
the energy, forces, virial calculated by whatever :math:`k`-space solver is the energy, forces, virial calculated by whatever :math:`k`-space solver is
defined by the :doc:`kspace_style <kspace_style>` command. If the defined by the :doc:`kspace_style <kspace_style>` command. If the

View File

@ -59,7 +59,7 @@ extracted from the :doc:`pair_style reaxff <pair_reaxff>` command and
the ReaxFF force field file it reads in. If a file name is specified the ReaxFF force field file it reads in. If a file name is specified
for *params*, then the parameters are taken from the specified file for *params*, then the parameters are taken from the specified file
and the file must contain one line for each atom type. The latter and the file must contain one line for each atom type. The latter
form must be used when performing QeQ with a non-ReaxFF potential. form must be used when performing QEq with a non-ReaxFF potential.
Each line should be formatted as follows: Each line should be formatted as follows:
.. parsed-literal:: .. parsed-literal::
@ -140,7 +140,8 @@ Related commands
"""""""""""""""" """"""""""""""""
:doc:`pair_style reaxff <pair_reaxff>`, :doc:`fix qeq/shielded <fix_qeq>`, :doc:`pair_style reaxff <pair_reaxff>`, :doc:`fix qeq/shielded <fix_qeq>`,
:doc:`fix acks2/reaxff <fix_acks2_reaxff>`, :doc:`fix qtpie/reaxff <fix_qtpie_reaxff>` :doc:`fix acks2/reaxff <fix_acks2_reaxff>`, :doc:`fix qtpie/reaxff <fix_qtpie_reaxff>`,
:doc:`fix qeq/rel/reaxff <fix_qeq_rel_reaxff>`
Default Default
""""""" """""""

View File

@ -0,0 +1,195 @@
.. index:: fix qeq/rel/reaxff
fix qeq/rel/reaxff command
==========================
Syntax
""""""
.. code-block:: LAMMPS
fix ID group-ID qeq/rel/reaxff Nevery cutlo cuthi tolerance params gfile args
* ID, group-ID are documented in :doc:`fix <fix>` command
* qeq/rel/reaxff = style name of this fix command
* Nevery = perform QEqR every this many steps
* cutlo,cuthi = lo and hi cutoff for Taper radius
* tolerance = precision to which charges will be equilibrated
* params = reaxff or a filename
* gfile = the name of a file containing Gaussian orbital exponents
* one or more keywords or keyword/value pairs may be appended
.. parsed-literal::
keyword = *scale* or *maxiter* or *nowarn*
*scale* beta = set value of scaling factor *beta* (determines strength of electric polarization)
*maxiter* N = limit the number of iterations to *N*
*nowarn* = do not print a warning message if the maximum number of iterations is reached
Examples
""""""""
.. code-block:: LAMMPS
fix 1 all qeq/rel/reaxff 1 0.0 10.0 1.0e-6 reaxff exp.qeqr
fix 1 all qeq/rel/reaxff 1 0.0 10.0 1.0e-6 params.qeqr exp.qeqr scale 1.5 maxiter 500 nowarn
Description
"""""""""""
.. versionadded:: 19Nov2024
This fix implements the QEqR method for charge equilibration, which
differs from the QEq charge equilibration method :ref:`(Rappe and
Goddard) <Rappe4>` only in how external electric fields are accounted
for. This fix therefore raises a warning when used without :doc:`fix
efield <fix_efield>` since :doc:`fix qeq/reaxff <fix_qeq_reaxff>` should
be used without an external electric field. Charges are computed with
the QEqR method by minimizing the electrostatic energy of the system in
the same way as the QEq method but where the absolute electronegativity,
:math:`\chi_i`, of each atom in the QEq method is replaced with an
effective electronegativity given by
.. math::
\chi_{\mathrm{r}i} = \chi_i + \frac{\sum_{j=1}^{N} \beta(\phi_i - \phi_j) S_{ij}}
{\sum_{m=1}^{N}S_{im}},
where :math:`N` is the number of atoms in the system, :math:`\beta` is a
scaling factor, :math:`\phi_i` and :math:`\phi_j` are the electric
potentials at the positions of atoms :math:`i` and :math:`j` due to the
external electric field and :math:`S_{ij}` is the overlap integral
between atoms :math:`i` and :math:`j`. This formulation is advantageous
over the method used by :doc:`fix qeq/reaxff <fix_qeq_reaxff>` to
account for an external electric field in that it permits periodic
boundaries in the direction of an external electric field and in that it
does not worsen long-range charge transfer seen with QEq.
This fix is typically used in conjunction with the ReaxFF force field
model as implemented in the :doc:`pair_style reaxff <pair_reaxff>`
command, but it can be used with any potential in LAMMPS, so long as it
defines and uses charges on each atom. For more technical details about
the charge equilibration performed by *fix qeq/rel/reaxff*, which is the
same as in :doc:`fix qeq/reaxff <fix_qeq_reaxff>` except for the use of
:math:`\chi_{\mathrm{r}i}`, please refer to :ref:`(Aktulga)
<qeq-Aktulga3>`. To be explicit, *fix qeq/rel/reaxff* replaces
:math:`\chi_k` of eq. 3 in :ref:`(Aktulga) <qeq-Aktulga3>` with
:math:`\chi_{\mathrm{r}k}` when an external electric field is applied.
This fix requires the absolute electronegativity, :math:`\chi`, in eV,
the self-Coulomb potential, :math:`\eta`, in eV, and the shielded
Coulomb constant, :math:`\gamma`, in :math:`\AA^{-1}`. If the *params*
setting above is the word "reaxff", then these are extracted from the
:doc:`pair_style reaxff <pair_reaxff>` command and the ReaxFF force
field file it reads in. If a file name is specified for *params*, then
the parameters are taken from the specified file and the file must
contain one line for each atom type. The latter form must be used when
using this fix with a non-ReaxFF potential. Each line should be
formatted as follows, ensuring that the parameters are given in units of
eV, eV, and :math:`\AA^{-1}`, respectively:
.. parsed-literal::
itype chi eta gamma
where *itype* is the atom type from 1 to Ntypes. Note that eta is
defined here as twice the eta value in the ReaxFF file.
The overlap integrals :math:`S_{ij}` are computed by using normalized 1s
Gaussian type orbitals. The Gaussian orbital exponents, :math:`\alpha`,
that are needed to compute the overlap integrals are taken from the file
given by *gfile*. This file must contain one line for each atom type
and provide the Gaussian orbital exponent for each atom type in units of
inverse square Bohr radius. Each line should be formatted as follows:
.. parsed-literal::
itype alpha
Empty lines or any text following the pound sign (#) are ignored. An
example *gfile* for a system with two atom types is
.. parsed-literal::
# An example gfile. Exponents are taken from Table 2.2 of Chen, J. (2009).
# Theory and applications of fluctuating-charge models.
# The units of the exponents are 1 / (Bohr radius)^2 .
1 0.2240 # O
2 0.5434 # H
The optional *scale* keyword sets the value of :math:`\beta` in the
equation for :math:`\chi_{\mathrm{r}i}`. The default value is 1.0.
The optional *maxiter* keyword allows changing the max number of
iterations in the linear solver. The default value is 200.
The optional *nowarn* keyword silences the warning message printed when
the maximum number of iterations is reached. This can be useful for
comparing serial and parallel results where having the same fixed number
of iterations is desired, which can be achieved by using a very small
tolerance and setting *maxiter* to the desired number of iterations.
.. note::
In order to solve the self-consistent equations for electronegativity
equalization, LAMMPS imposes the additional constraint that all the
charges in the fix group must add up to zero. The initial charge
assignments should also satisfy this constraint. LAMMPS will print a
warning if that is not the case.
Restart, fix_modify, output, run start/stop, minimize info
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
No information about this fix is written to :doc:`binary restart files
<restart>`. This fix computes a global scalar (the number of
iterations) and a per-atom vector (the effective electronegativity),
which can be accessed by various :doc:`output commands <Howto_output>`.
No parameter of this fix can be used with the *start/stop* keywords of
the :doc:`run <run>` command.
This fix is invoked during :doc:`energy minimization <minimize>`.
Restrictions
""""""""""""
This fix is part of the REAXFF package. It is only enabled if LAMMPS
was built with that package. See the :doc:`Build package
<Build_package>` page for more info.
This fix does not correctly handle interactions involving multiple
periodic images of the same atom. Hence, it should not be used for
periodic cell dimensions smaller than the non-bonded cutoff radius,
which is typically :math:`10~\AA` for ReaxFF simulations.
This fix may be used in combination with :doc:`fix efield <fix_efield>`
and will apply the external electric field during charge equilibration,
but there may be only one fix efield instance used and the electric
field must be applied to all atoms in the system. Consequently, `fix
efield` must be used with *group-ID* all and must not be used with the
keyword *region*. Equal-style variables can be used for electric field
vector components without any further settings. Atom-style variables can
be used for spatially-varying electric field vector components, but the
resulting electric potential must be specified as an atom-style variable
using the *potential* keyword for `fix efield`.
Related commands
""""""""""""""""
:doc:`pair_style reaxff <pair_reaxff>`, :doc:`fix qeq/reaxff <fix_qeq_reaxff>`,
:doc:`fix acks2/reaxff <fix_acks2_reaxff>`, :doc:`fix qtpie/reaxff <fix_qtpie_reaxff>`
Default
"""""""
scale = 1.0 and maxiter = 200
----------
.. _Rappe4:
**(Rappe)** Rappe and Goddard III, Journal of Physical Chemistry, 95,
3358-3363 (1991).
.. _qeq-Aktulga3:
**(Aktulga)** Aktulga, Fogarty, Pandit, Grama, Parallel Computing, 38,
245-259 (2012).

View File

@ -21,8 +21,10 @@ Syntax
.. parsed-literal:: .. parsed-literal::
keyword = *maxiter* keyword = *scale* or *maxiter* or *nowarn*
*scale* beta = set value of scaling factor *beta* (determines strength of electric polarization)
*maxiter* N = limit the number of iterations to *N* *maxiter* N = limit the number of iterations to *N*
*nowarn* = do not print a warning message if the maximum number of iterations is reached
Examples Examples
"""""""" """"""""
@ -30,7 +32,7 @@ Examples
.. code-block:: LAMMPS .. code-block:: LAMMPS
fix 1 all qtpie/reaxff 1 0.0 10.0 1.0e-6 reaxff exp.qtpie fix 1 all qtpie/reaxff 1 0.0 10.0 1.0e-6 reaxff exp.qtpie
fix 1 all qtpie/reaxff 1 0.0 10.0 1.0e-6 params.qtpie exp.qtpie maxiter 500 fix 1 all qtpie/reaxff 1 0.0 10.0 1.0e-6 params.qtpie exp.qtpie scale 1.5 maxiter 500 nowarn
Description Description
""""""""""" """""""""""
@ -46,7 +48,7 @@ same way as the QEq method but where the absolute electronegativity,
electronegativity given by :ref:`(Chen) <qtpie-Chen>` electronegativity given by :ref:`(Chen) <qtpie-Chen>`
.. math:: .. math::
\chi_{\mathrm{eff},i} = \frac{\sum_{j=1}^{N} (\chi_i - \chi_j) S_{ij}} \tilde{\chi}_{i} = \frac{\sum_{j=1}^{N} (\chi_i - \chi_j) S_{ij}}
{\sum_{m=1}^{N}S_{im}}, {\sum_{m=1}^{N}S_{im}},
which acts to penalize long-range charge transfer seen with the QEq charge which acts to penalize long-range charge transfer seen with the QEq charge
@ -61,11 +63,11 @@ electric field by using the effective electronegativity given in
:ref:`(Gergs) <Gergs>`: :ref:`(Gergs) <Gergs>`:
.. math:: .. math::
\chi_{\mathrm{eff},i} = \frac{\sum_{j=1}^{N} (\chi_i - \chi_j + \phi_i - \phi_j) S_{ij}} \tilde{\chi}_{\mathrm{r}i} = \frac{\sum_{j=1}^{N} (\chi_i - \chi_j + \beta(\phi_i - \phi_j)) S_{ij}}
{\sum_{m=1}^{N}S_{im}}, {\sum_{m=1}^{N}S_{im}},
where :math:`\phi_i` and :math:`\phi_j` are the electric where :math:`\beta` is a scaling factor and :math:`\phi_i` and :math:`\phi_j`
potentials at the positions of atom :math:`i` and :math:`j` are the electric potentials at the positions of atoms :math:`i` and :math:`j`
due to the external electric field. due to the external electric field.
This fix is typically used in conjunction with the ReaxFF force This fix is typically used in conjunction with the ReaxFF force
@ -74,9 +76,12 @@ command, but it can be used with any potential in LAMMPS, so long as it
defines and uses charges on each atom. For more technical details about the defines and uses charges on each atom. For more technical details about the
charge equilibration performed by `fix qtpie/reaxff`, which is the same as in charge equilibration performed by `fix qtpie/reaxff`, which is the same as in
:doc:`fix qeq/reaxff <fix_qeq_reaxff>` except for the use of :doc:`fix qeq/reaxff <fix_qeq_reaxff>` except for the use of
:math:`\chi_{\mathrm{eff},i}`, please refer to :ref:`(Aktulga) <qeq-Aktulga2>`. :math:`\tilde{\chi}_{i}` or :math:`\tilde{\chi}_{\mathrm{r}i}`,
please refer to :ref:`(Aktulga) <qeq-Aktulga2>`.
To be explicit, this fix replaces :math:`\chi_k` of eq. 3 in To be explicit, this fix replaces :math:`\chi_k` of eq. 3 in
:ref:`(Aktulga) <qeq-Aktulga2>` with :math:`\chi_{\mathrm{eff},k}`. :ref:`(Aktulga) <qeq-Aktulga2>` with :math:`\tilde{\chi}_{k}` when no external
electric field is applied and with :math:`\tilde{\chi}_{\mathrm{r}k}` when an
external electric field is applied.
This fix requires the absolute electronegativity, :math:`\chi`, in eV, the This fix requires the absolute electronegativity, :math:`\chi`, in eV, the
self-Coulomb potential, :math:`\eta`, in eV, and the shielded Coulomb self-Coulomb potential, :math:`\eta`, in eV, and the shielded Coulomb
@ -97,7 +102,7 @@ respectively:
where *itype* is the atom type from 1 to Ntypes. Note that eta is where *itype* is the atom type from 1 to Ntypes. Note that eta is
defined here as twice the eta value in the ReaxFF file. defined here as twice the eta value in the ReaxFF file.
The overlap integrals in the equation for :math:`\chi_{\mathrm{eff},i}` The overlap integrals :math:`S_{ij}`
are computed by using normalized 1s Gaussian type orbitals. The Gaussian are computed by using normalized 1s Gaussian type orbitals. The Gaussian
orbital exponents, :math:`\alpha`, that are needed to compute the overlap orbital exponents, :math:`\alpha`, that are needed to compute the overlap
integrals are taken from the file given by *gfile*. integrals are taken from the file given by *gfile*.
@ -120,9 +125,20 @@ Empty lines or any text following the pound sign (#) are ignored. An example
1 0.2240 # O 1 0.2240 # O
2 0.5434 # H 2 0.5434 # H
The optional *scale* keyword sets the value of :math:`\beta` in the equation for
:math:`\tilde{\chi}_{\mathrm{r}i}`. This keyword only affects the computed charges
when :doc:`fix efield <fix_efield>` is used. The default value is 1.0.
The optional *maxiter* keyword allows changing the max number The optional *maxiter* keyword allows changing the max number
of iterations in the linear solver. The default value is 200. of iterations in the linear solver. The default value is 200.
The optional *nowarn* keyword silences the warning message printed
when the maximum number of iterations is reached. This can be
useful for comparing serial and parallel results where having the
same fixed number of iterations is desired, which can be achieved
by using a very small tolerance and setting *maxiter* to the desired
number of iterations.
.. note:: .. note::
In order to solve the self-consistent equations for electronegativity In order to solve the self-consistent equations for electronegativity
@ -170,12 +186,13 @@ Related commands
"""""""""""""""" """"""""""""""""
:doc:`pair_style reaxff <pair_reaxff>`, :doc:`fix qeq/reaxff <fix_qeq_reaxff>`, :doc:`pair_style reaxff <pair_reaxff>`, :doc:`fix qeq/reaxff <fix_qeq_reaxff>`,
:doc:`fix acks2/reaxff <fix_acks2_reaxff>` :doc:`fix acks2/reaxff <fix_acks2_reaxff>`,
:doc:`fix qeq/rel/reaxff <fix_qeq_rel_reaxff>`
Default Default
""""""" """""""
maxiter 200 scale = 1.0 and maxiter = 200
---------- ----------

View File

@ -725,6 +725,7 @@ dashpot
dat dat
datafile datafile
datatype datatype
dataset
datums datums
Davidchack Davidchack
Daw Daw
@ -3120,9 +3121,11 @@ qE
qeff qeff
qelectron qelectron
qeq qeq
qeqr
Qamar Qamar
QeQ QeQ
QEq QEq
QEqR
qfactor qfactor
qfile qfile
qi qi

View File

@ -0,0 +1,29 @@
# Water with QEqR
boundary p p p
units real
atom_style charge
read_data data.water
variable x index 1
variable y index 1
variable z index 1
replicate $x $y $z
pair_style reaxff NULL safezone 3.0 mincap 150
pair_coeff * * qeq_ff.water O H
neighbor 0.5 bin
neigh_modify every 1 delay 0 check yes
velocity all create 300.0 4928459 rot yes dist gaussian
fix 1 all qeqr/reaxff 1 0.0 10.0 1.0e-6 reaxff gauss_exp.txt
fix 2 all nvt temp 300 300 50.0
timestep 0.5
thermo 10
thermo_style custom step temp press density vol
run 20

View File

@ -0,0 +1,30 @@
# Water with QEqR
boundary p p p
units real
atom_style charge
read_data data.water
variable x index 1
variable y index 1
variable z index 1
replicate $x $y $z
pair_style reaxff NULL safezone 3.0 mincap 150
pair_coeff * * qeq_ff.water O H
neighbor 0.5 bin
neigh_modify every 1 delay 0 check yes
velocity all create 300.0 4928459 rot yes dist gaussian
fix 1 all qeqr/reaxff 1 0.0 10.0 1.0e-6 reaxff gauss_exp.txt
fix 2 all nvt temp 300 300 50.0
fix 3 all efield 0.0 0.0 0.05
timestep 0.5
thermo 10
thermo_style custom step temp press density vol
run 20

View File

@ -0,0 +1,115 @@
LAMMPS (4 Feb 2025 - Development - patch_4Feb2025-444-gbb8b6590d5-modified)
using 1 OpenMP thread(s) per MPI task
# Water with QEqR
boundary p p p
units real
atom_style charge
read_data data.water
Reading data file ...
orthogonal box = (0 0 0) to (31.043046 31.043046 31.043046)
1 by 1 by 1 MPI processor grid
reading atoms ...
3000 atoms
read_data CPU = 0.053 seconds
variable x index 1
variable y index 1
variable z index 1
replicate $x $y $z
replicate 1 $y $z
replicate 1 1 $z
replicate 1 1 1
Replication is creating a 1x1x1 = 1 times larger system...
orthogonal box = (0 0 0) to (31.043046 31.043046 31.043046)
1 by 1 by 1 MPI processor grid
3000 atoms
replicate CPU = 0.001 seconds
pair_style reaxff NULL safezone 3.0 mincap 150
pair_coeff * * qeq_ff.water O H
WARNING: Changed valency_val to valency_boc for X (src/REAXFF/reaxff_ffield.cpp:300)
neighbor 0.5 bin
neigh_modify every 1 delay 0 check yes
velocity all create 300.0 4928459 rot yes dist gaussian
fix 1 all qeqr/reaxff 1 0.0 10.0 1.0e-6 reaxff gauss_exp.txt
fix 2 all nvt temp 300 300 50.0
fix 3 all efield 0.0 0.0 0.05
timestep 0.5
thermo 10
thermo_style custom step temp press density vol
run 20
CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
Your simulation uses code contributions which should be cited:
- pair reaxff command: doi:10.1016/j.parco.2011.08.005
@Article{Aktulga12,
author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama},
title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques},
journal = {Parallel Computing},
year = 2012,
volume = 38,
number = {4--5},
pages = {245--259}
}
CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
Neighbor list info ...
update: every = 1 steps, delay = 0 steps, check = yes
max neighbors/atom: 2000, page size: 100000
master list distance cutoff = 10.5
ghost atom cutoff = 10.5
binsize = 5.25, bins = 6 6 6
2 neighbor lists, perpetual/occasional/extra = 2 0 0
(1) pair reaxff, perpetual
attributes: half, newton off, ghost
pair build: half/bin/ghost/newtoff
stencil: full/ghost/bin/3d
bin: standard
(2) fix qeqr/reaxff, perpetual, copy from (1)
attributes: half, newton off
pair build: copy
stencil: none
bin: none
Per MPI rank memory allocation (min/avg/max) = 539.2 | 539.2 | 539.2 Mbytes
Step Temp Press Density Volume
0 300 778.75601 1 29915.273
10 301.42845 5423.6612 1 29915.273
20 298.24707 1549.2257 1 29915.273
Loop time of 10.6859 on 1 procs for 20 steps with 3000 atoms
Performance: 0.081 ns/day, 296.830 hours/ns, 1.872 timesteps/s, 5.615 katom-step/s
100.0% CPU use with 1 MPI tasks x 1 OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 4.7595 | 4.7595 | 4.7595 | 0.0 | 44.54
Neigh | 0.17605 | 0.17605 | 0.17605 | 0.0 | 1.65
Comm | 0.0017511 | 0.0017511 | 0.0017511 | 0.0 | 0.02
Output | 8.3809e-05 | 8.3809e-05 | 8.3809e-05 | 0.0 | 0.00
Modify | 5.748 | 5.748 | 5.748 | 0.0 | 53.79
Other | | 0.0005279 | | | 0.00
Nlocal: 3000 ave 3000 max 3000 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Nghost: 11075 ave 11075 max 11075 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Neighs: 971785 ave 971785 max 971785 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Total # of neighbors = 971785
Ave neighs/atom = 323.92833
Neighbor list builds = 2
Dangerous builds = 0
Total wall time: 0:00:12

View File

@ -0,0 +1,115 @@
LAMMPS (4 Feb 2025 - Development - patch_4Feb2025-444-gbb8b6590d5-modified)
using 1 OpenMP thread(s) per MPI task
# Water with QEqR
boundary p p p
units real
atom_style charge
read_data data.water
Reading data file ...
orthogonal box = (0 0 0) to (31.043046 31.043046 31.043046)
1 by 2 by 2 MPI processor grid
reading atoms ...
3000 atoms
read_data CPU = 0.053 seconds
variable x index 1
variable y index 1
variable z index 1
replicate $x $y $z
replicate 1 $y $z
replicate 1 1 $z
replicate 1 1 1
Replication is creating a 1x1x1 = 1 times larger system...
orthogonal box = (0 0 0) to (31.043046 31.043046 31.043046)
1 by 2 by 2 MPI processor grid
3000 atoms
replicate CPU = 0.002 seconds
pair_style reaxff NULL safezone 3.0 mincap 150
pair_coeff * * qeq_ff.water O H
WARNING: Changed valency_val to valency_boc for X (src/REAXFF/reaxff_ffield.cpp:300)
neighbor 0.5 bin
neigh_modify every 1 delay 0 check yes
velocity all create 300.0 4928459 rot yes dist gaussian
fix 1 all qeqr/reaxff 1 0.0 10.0 1.0e-6 reaxff gauss_exp.txt
fix 2 all nvt temp 300 300 50.0
fix 3 all efield 0.0 0.0 0.05
timestep 0.5
thermo 10
thermo_style custom step temp press density vol
run 20
CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
Your simulation uses code contributions which should be cited:
- pair reaxff command: doi:10.1016/j.parco.2011.08.005
@Article{Aktulga12,
author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama},
title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques},
journal = {Parallel Computing},
year = 2012,
volume = 38,
number = {4--5},
pages = {245--259}
}
CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
Neighbor list info ...
update: every = 1 steps, delay = 0 steps, check = yes
max neighbors/atom: 2000, page size: 100000
master list distance cutoff = 10.5
ghost atom cutoff = 10.5
binsize = 5.25, bins = 6 6 6
2 neighbor lists, perpetual/occasional/extra = 2 0 0
(1) pair reaxff, perpetual
attributes: half, newton off, ghost
pair build: half/bin/ghost/newtoff
stencil: full/ghost/bin/3d
bin: standard
(2) fix qeqr/reaxff, perpetual, copy from (1)
attributes: half, newton off
pair build: copy
stencil: none
bin: none
Per MPI rank memory allocation (min/avg/max) = 260.5 | 262.2 | 263.6 Mbytes
Step Temp Press Density Volume
0 300 778.75601 1 29915.273
10 301.42845 5423.6623 1 29915.273
20 298.24708 1549.2264 1 29915.273
Loop time of 3.10467 on 4 procs for 20 steps with 3000 atoms
Performance: 0.278 ns/day, 86.241 hours/ns, 6.442 timesteps/s, 19.326 katom-step/s
99.6% CPU use with 4 MPI tasks x 1 OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 1.668 | 1.6843 | 1.7266 | 1.9 | 54.25
Neigh | 0.08549 | 0.086004 | 0.086638 | 0.2 | 2.77
Comm | 0.0135 | 0.055821 | 0.072105 | 10.4 | 1.80
Output | 4.9632e-05 | 5.4515e-05 | 6.8384e-05 | 0.0 | 0.00
Modify | 1.2774 | 1.2781 | 1.2786 | 0.0 | 41.17
Other | | 0.000458 | | | 0.01
Nlocal: 750 ave 760 max 735 min
Histogram: 1 0 0 0 1 0 0 0 0 2
Nghost: 6230.75 ave 6255 max 6191 min
Histogram: 1 0 0 0 0 1 0 0 1 1
Neighs: 276996 ave 280553 max 271385 min
Histogram: 1 0 0 0 0 1 0 0 0 2
Total # of neighbors = 1107985
Ave neighs/atom = 369.32833
Neighbor list builds = 2
Dangerous builds = 0
Total wall time: 0:00:03

View File

@ -0,0 +1,116 @@
LAMMPS (4 Feb 2025 - Development - patch_4Feb2025-444-gbb8b6590d5-modified)
using 1 OpenMP thread(s) per MPI task
# Water with QEqR
boundary p p p
units real
atom_style charge
read_data data.water
Reading data file ...
orthogonal box = (0 0 0) to (31.043046 31.043046 31.043046)
1 by 1 by 1 MPI processor grid
reading atoms ...
3000 atoms
read_data CPU = 0.055 seconds
variable x index 1
variable y index 1
variable z index 1
replicate $x $y $z
replicate 1 $y $z
replicate 1 1 $z
replicate 1 1 1
Replication is creating a 1x1x1 = 1 times larger system...
orthogonal box = (0 0 0) to (31.043046 31.043046 31.043046)
1 by 1 by 1 MPI processor grid
3000 atoms
replicate CPU = 0.001 seconds
pair_style reaxff NULL safezone 3.0 mincap 150
pair_coeff * * qeq_ff.water O H
WARNING: Changed valency_val to valency_boc for X (src/REAXFF/reaxff_ffield.cpp:300)
neighbor 0.5 bin
neigh_modify every 1 delay 0 check yes
velocity all create 300.0 4928459 rot yes dist gaussian
fix 1 all qeqr/reaxff 1 0.0 10.0 1.0e-6 reaxff gauss_exp.txt
fix 2 all nvt temp 300 300 50.0
timestep 0.5
thermo 10
thermo_style custom step temp press density vol
run 20
CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
Your simulation uses code contributions which should be cited:
- pair reaxff command: doi:10.1016/j.parco.2011.08.005
@Article{Aktulga12,
author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama},
title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques},
journal = {Parallel Computing},
year = 2012,
volume = 38,
number = {4--5},
pages = {245--259}
}
CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
WARNING: Use fix qeq/reaxff instead of fix qeqr/reaxff when not using fix efield
(src/REAXFF/fix_qtpie_reaxff.cpp:493)
Neighbor list info ...
update: every = 1 steps, delay = 0 steps, check = yes
max neighbors/atom: 2000, page size: 100000
master list distance cutoff = 10.5
ghost atom cutoff = 10.5
binsize = 5.25, bins = 6 6 6
2 neighbor lists, perpetual/occasional/extra = 2 0 0
(1) pair reaxff, perpetual
attributes: half, newton off, ghost
pair build: half/bin/ghost/newtoff
stencil: full/ghost/bin/3d
bin: standard
(2) fix qeqr/reaxff, perpetual, copy from (1)
attributes: half, newton off
pair build: copy
stencil: none
bin: none
Per MPI rank memory allocation (min/avg/max) = 539.2 | 539.2 | 539.2 Mbytes
Step Temp Press Density Volume
0 300 780.33989 1 29915.273
10 301.29205 5433.7414 1 29915.273
20 297.90652 1572.6111 1 29915.273
Loop time of 6.87447 on 1 procs for 20 steps with 3000 atoms
Performance: 0.126 ns/day, 190.957 hours/ns, 2.909 timesteps/s, 8.728 katom-step/s
100.0% CPU use with 1 MPI tasks x 1 OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 4.8461 | 4.8461 | 4.8461 | 0.0 | 70.49
Neigh | 0.17595 | 0.17595 | 0.17595 | 0.0 | 2.56
Comm | 0.001787 | 0.001787 | 0.001787 | 0.0 | 0.03
Output | 8.5794e-05 | 8.5794e-05 | 8.5794e-05 | 0.0 | 0.00
Modify | 1.8501 | 1.8501 | 1.8501 | 0.0 | 26.91
Other | | 0.0004811 | | | 0.01
Nlocal: 3000 ave 3000 max 3000 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Nghost: 11077 ave 11077 max 11077 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Neighs: 971826 ave 971826 max 971826 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Total # of neighbors = 971826
Ave neighs/atom = 323.942
Neighbor list builds = 2
Dangerous builds = 0
Total wall time: 0:00:07

View File

@ -0,0 +1,116 @@
LAMMPS (4 Feb 2025 - Development - patch_4Feb2025-444-gbb8b6590d5-modified)
using 1 OpenMP thread(s) per MPI task
# Water with QEqR
boundary p p p
units real
atom_style charge
read_data data.water
Reading data file ...
orthogonal box = (0 0 0) to (31.043046 31.043046 31.043046)
1 by 2 by 2 MPI processor grid
reading atoms ...
3000 atoms
read_data CPU = 0.082 seconds
variable x index 1
variable y index 1
variable z index 1
replicate $x $y $z
replicate 1 $y $z
replicate 1 1 $z
replicate 1 1 1
Replication is creating a 1x1x1 = 1 times larger system...
orthogonal box = (0 0 0) to (31.043046 31.043046 31.043046)
1 by 2 by 2 MPI processor grid
3000 atoms
replicate CPU = 0.002 seconds
pair_style reaxff NULL safezone 3.0 mincap 150
pair_coeff * * qeq_ff.water O H
WARNING: Changed valency_val to valency_boc for X (src/REAXFF/reaxff_ffield.cpp:300)
neighbor 0.5 bin
neigh_modify every 1 delay 0 check yes
velocity all create 300.0 4928459 rot yes dist gaussian
fix 1 all qeqr/reaxff 1 0.0 10.0 1.0e-6 reaxff gauss_exp.txt
fix 2 all nvt temp 300 300 50.0
timestep 0.5
thermo 10
thermo_style custom step temp press density vol
run 20
CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
Your simulation uses code contributions which should be cited:
- pair reaxff command: doi:10.1016/j.parco.2011.08.005
@Article{Aktulga12,
author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama},
title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques},
journal = {Parallel Computing},
year = 2012,
volume = 38,
number = {4--5},
pages = {245--259}
}
CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
WARNING: Use fix qeq/reaxff instead of fix qeqr/reaxff when not using fix efield
(src/REAXFF/fix_qtpie_reaxff.cpp:493)
Neighbor list info ...
update: every = 1 steps, delay = 0 steps, check = yes
max neighbors/atom: 2000, page size: 100000
master list distance cutoff = 10.5
ghost atom cutoff = 10.5
binsize = 5.25, bins = 6 6 6
2 neighbor lists, perpetual/occasional/extra = 2 0 0
(1) pair reaxff, perpetual
attributes: half, newton off, ghost
pair build: half/bin/ghost/newtoff
stencil: full/ghost/bin/3d
bin: standard
(2) fix qeqr/reaxff, perpetual, copy from (1)
attributes: half, newton off
pair build: copy
stencil: none
bin: none
Per MPI rank memory allocation (min/avg/max) = 260.5 | 262.2 | 263.6 Mbytes
Step Temp Press Density Volume
0 300 780.34006 1 29915.273
10 301.29205 5433.7414 1 29915.273
20 297.90652 1572.6112 1 29915.273
Loop time of 2.52349 on 4 procs for 20 steps with 3000 atoms
Performance: 0.342 ns/day, 70.097 hours/ns, 7.926 timesteps/s, 23.777 katom-step/s
99.0% CPU use with 4 MPI tasks x 1 OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 1.7081 | 1.7518 | 1.7812 | 2.3 | 69.42
Neigh | 0.10017 | 0.10116 | 0.10315 | 0.4 | 4.01
Comm | 0.014848 | 0.044256 | 0.087941 | 14.7 | 1.75
Output | 5.1199e-05 | 5.663e-05 | 7.1837e-05 | 0.0 | 0.00
Modify | 0.62379 | 0.62575 | 0.62671 | 0.1 | 24.80
Other | | 0.000504 | | | 0.02
Nlocal: 750 ave 759 max 735 min
Histogram: 1 0 0 0 0 1 0 0 0 2
Nghost: 6230.5 ave 6256 max 6190 min
Histogram: 1 0 0 0 0 1 0 0 1 1
Neighs: 277008 ave 280943 max 271394 min
Histogram: 1 0 0 0 0 1 0 0 1 1
Total # of neighbors = 1108032
Ave neighs/atom = 369.344
Neighbor list builds = 2
Dangerous builds = 0
Total wall time: 0:00:02

View File

@ -1,5 +1,72 @@
# CHANGELOG # CHANGELOG
## 4.6.00
[Full Changelog](https://github.com/kokkos/kokkos/compare/4.5.01...4.6.00)
### Features:
* Kokkos::Graph: Allow adding tasks to the graph via a `then`-node [\#7629](https://github.com/kokkos/kokkos/pull/7629)
* Kokkos::Graph: Allow construction from CUDA/HIP graph [\#7664](https://github.com/kokkos/kokkos/pull/7664)
* HIP: Add experimental support for using multiple GPUs from one process [\#7130](https://github.com/kokkos/kokkos/pull/7130)
### Backend and Architecture Enhancements:
#### CUDA:
* Improved reduction performance, in particular on H100 and newer [\#7823](https://github.com/kokkos/kokkos/pull/7823)
#### HIP:
* Change block size deduction to prefer smaller blocks/teams [\#7509](https://github.com/kokkos/kokkos/pull/7509)
* Allocate memory with stream ordered semantics (i.e. use `hipMallocAsync`) [\#7659](https://github.com/kokkos/kokkos/pull/7659)
* Fix a segfault when a virtual function called inside a kernel requires too many registers[\#7660](https://github.com/kokkos/kokkos/pull/7660)
#### SYCL:
* Improve sorting performance for non-contiguous views [\#7502](https://github.com/kokkos/kokkos/pull/7502)
#### Serial:
* Reduce fences overhead when using `Kokkos_ENABLE_ATOMICS_BYPASS` [\#7821](https://github.com/kokkos/kokkos/pull/7821)
### General Enhancements
* Allow use of `kokkos_check` in `<PackageName>Config.cmake` without warnings [\#7669](https://github.com/kokkos/kokkos/pull/7669)
* Add simd compound assignments and update simd reductions [\#7486](https://github.com/kokkos/kokkos/pull/7486)
* Improve performance of the `inclusive_scan` algorithm with Cuda and HIP [\#7542](https://github.com/kokkos/kokkos/pull/7542)
* Reduce tooling interface overhead (don't pay for what you don't use) [\#7817](https://github.com/kokkos/kokkos/pull/7817)
* Avoid storing the view in `RandomAccessIterator` to increase performance [\#7304](https://github.com/kokkos/kokkos/pull/7304)
* Make `RandomAccessIterator` fulfill `std::random_access_iterator concept` [\#7451](https://github.com/kokkos/kokkos/pull/7451)
* Include information about support for system allocated memory in `print_configuration` (Cuda and HIP) [\#7673](https://github.com/kokkos/kokkos/pull/7673)
### Build System Changes
* Add support for Zen 4 AMD microarchitecture [\#7550](https://github.com/kokkos/kokkos/pull/7550)
* Enable NVIDIA Grace architecture with NVHPC [\#7858](https://github.com/kokkos/kokkos/pull/7858)
* Support static library builds when using CUDA as CMake language [\#7830](https://github.com/kokkos/kokkos/pull/7830)
### Incompatibilities (i.e. breaking changes)
* Change SIMD comparison operator to return `simd_mask` instead of `bool` [\#7781](https://github.com/kokkos/kokkos/pull/7781)
* Remove classic Intel compiler (icpc) support [\#7737](https://github.com/kokkos/kokkos/pull/7737)
* Remove `operator[]` overloads of Kokkos `basic_simd` and `basic_simd_mask` that return a reference [\#7630](https://github.com/kokkos/kokkos/pull/7630)
### Deprecations
* Deprecate `StaticCrsGraph` and move it to Kokkos Kernels into `KokkosSparse::` [\#7516](https://github.com/kokkos/kokkos/pull/7516)
* Deprecate `native_simd` and hide `simd_abi` [\#7472](https://github.com/kokkos/kokkos/pull/7472)
* Deprecate Makefile support [\#7613](https://github.com/kokkos/kokkos/pull/7613)
* DualView: Deprecate direct access to d_view and h_view [\#7716](https://github.com/kokkos/kokkos/pull/7716)
### Bug Fixes
* Fix performance bug affecting `atomic_fetch_{add,sub,min,max,and,or,xor}` on integral types `long` and `unsigned long` with HIP [\#7816](https://github.com/kokkos/kokkos/pull/7816)
* Fix execution of ranges with more than 2B elements [\#7797](https://github.com/kokkos/kokkos/pull/7797)
* Fix clean target when embedding Kokkos in another project [\#7557](https://github.com/kokkos/kokkos/pull/7557)
* Fix Zen3 flag for NVHPC [\#7558](https://github.com/kokkos/kokkos/pull/7558)
* graph: nodes must be stored by the graph [\#7619](https://github.com/kokkos/kokkos/pull/7619)
* Make sure lock arrays are on device before launching a graph [\#7685](https://github.com/kokkos/kokkos/pull/7685)
* Performance bug in `RangePolicy`: construct error message if and only if the precondition is violated [\#7809](https://github.com/kokkos/kokkos/pull/7809)
* simd: fix a bug in scalar min/max [\#7813](https://github.com/kokkos/kokkos/pull/7813)
* simd: fix a bug in non-masked reductions [\#7845](https://github.com/kokkos/kokkos/pull/7845)
* Cuda: fix incorrect iteration in `MDRangePolicy` of rank > 4 for high iteration counts [\#7724](https://github.com/kokkos/kokkos/pull/7724)
* Cuda: ignore gcc assembler options in `nvcc-wrapper` [\#7492](https://github.com/kokkos/kokkos/pull/7492)
* Build system: hint to `ARCH_NATIVE` if ARMv9 Grace arch is not explicitly supported by the compiler [\#7862](https://github.com/kokkos/kokkos/pull/7862)
* Use right arch for MI300A in makefiles [\#7786](https://github.com/kokkos/kokkos/pull/7786)
* Fix compiling BasicView on MSVC [\#7751](https://github.com/kokkos/kokkos/pull/7751)
## 4.5.01 ## 4.5.01
[Full Changelog](https://github.com/kokkos/kokkos/compare/4.5.00...4.5.01) [Full Changelog](https://github.com/kokkos/kokkos/compare/4.5.00...4.5.01)

View File

@ -148,8 +148,8 @@ elseif(NOT CMAKE_SIZEOF_VOID_P EQUAL 8)
endif() endif()
set(Kokkos_VERSION_MAJOR 4) set(Kokkos_VERSION_MAJOR 4)
set(Kokkos_VERSION_MINOR 5) set(Kokkos_VERSION_MINOR 6)
set(Kokkos_VERSION_PATCH 1) set(Kokkos_VERSION_PATCH 0)
set(Kokkos_VERSION "${Kokkos_VERSION_MAJOR}.${Kokkos_VERSION_MINOR}.${Kokkos_VERSION_PATCH}") set(Kokkos_VERSION "${Kokkos_VERSION_MAJOR}.${Kokkos_VERSION_MINOR}.${Kokkos_VERSION_PATCH}")
message(STATUS "Kokkos version: ${Kokkos_VERSION}") message(STATUS "Kokkos version: ${Kokkos_VERSION}")
math(EXPR KOKKOS_VERSION "${Kokkos_VERSION_MAJOR} * 10000 + ${Kokkos_VERSION_MINOR} * 100 + ${Kokkos_VERSION_PATCH}") math(EXPR KOKKOS_VERSION "${Kokkos_VERSION_MAJOR} * 10000 + ${Kokkos_VERSION_MINOR} * 100 + ${Kokkos_VERSION_PATCH}")

View File

@ -0,0 +1,4 @@
set(CTEST_PROJECT_NAME Kokkos)
set(CTEST_NIGHTLY_START_TIME 01:00:00 UTC)
set(CTEST_SUBMIT_URL https://my.cdash.org/submit.php?project=Kokkos)
set(CTEST_DROP_SITE_CDASH TRUE)

View File

@ -1,6 +1,9 @@
# Default settings common options. # Default settings common options.
#SPARTA specific settings: #LAMMPS specific settings:
KOKKOS_USE_DEPRECATED_MAKEFILES=1
ifndef KOKKOS_PATH ifndef KOKKOS_PATH
KOKKOS_PATH=../../lib/kokkos KOKKOS_PATH=../../lib/kokkos
endif endif
@ -10,9 +13,14 @@ ifeq ($(mode),shared)
CXXFLAGS += $(SHFLAGS) CXXFLAGS += $(SHFLAGS)
endif endif
ifneq ($(KOKKOS_USE_DEPRECATED_MAKEFILES), 1)
$(error Makefile support is deprecated. Only CMake builds will be supported from Kokkos 5 on. Set KOKKOS_USE_DEPRECATED_MAKEFILES=1 to silence this error.)
endif
KOKKOS_VERSION_MAJOR = 4 KOKKOS_VERSION_MAJOR = 4
KOKKOS_VERSION_MINOR = 5 KOKKOS_VERSION_MINOR = 6
KOKKOS_VERSION_PATCH = 1 KOKKOS_VERSION_PATCH = 0
KOKKOS_VERSION = $(shell echo $(KOKKOS_VERSION_MAJOR)*10000+$(KOKKOS_VERSION_MINOR)*100+$(KOKKOS_VERSION_PATCH) | bc) KOKKOS_VERSION = $(shell echo $(KOKKOS_VERSION_MAJOR)*10000+$(KOKKOS_VERSION_MINOR)*100+$(KOKKOS_VERSION_PATCH) | bc)
# Options: Cuda,HIP,SYCL,OpenMPTarget,OpenMP,Threads,Serial # Options: Cuda,HIP,SYCL,OpenMPTarget,OpenMP,Threads,Serial
@ -24,7 +32,7 @@ KOKKOS_DEVICES ?= "OpenMP"
# ARM: ARMv80,ARMv81,ARMv8-ThunderX,ARMv8-TX2,A64FX,ARMv9-Grace # ARM: ARMv80,ARMv81,ARMv8-ThunderX,ARMv8-TX2,A64FX,ARMv9-Grace
# IBM: Power8,Power9 # IBM: Power8,Power9
# AMD-GPUS: AMD_GFX906,AMD_GFX908,AMD_GFX90A,AMD_GFX940,AMD_GFX942,AMD_GFX942_APU,AMD_GFX1030,AMD_GFX1100,AMD_GFX1103 # AMD-GPUS: AMD_GFX906,AMD_GFX908,AMD_GFX90A,AMD_GFX940,AMD_GFX942,AMD_GFX942_APU,AMD_GFX1030,AMD_GFX1100,AMD_GFX1103
# AMD-CPUS: AMDAVX,Zen,Zen2,Zen3 # AMD-CPUS: AMDAVX,Zen,Zen2,Zen3,Zen4
# Intel-GPUs: Intel_Gen,Intel_Gen9,Intel_Gen11,Intel_Gen12LP,Intel_DG1,Intel_XeHP,Intel_PVC # Intel-GPUs: Intel_Gen,Intel_Gen9,Intel_Gen11,Intel_Gen12LP,Intel_DG1,Intel_XeHP,Intel_PVC
KOKKOS_ARCH ?= "" KOKKOS_ARCH ?= ""
# Options: yes,no # Options: yes,no
@ -442,13 +450,16 @@ KOKKOS_INTERNAL_USE_ARCH_IBM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_
# AMD based. # AMD based.
KOKKOS_INTERNAL_USE_ARCH_AMDAVX := $(call kokkos_has_string,$(KOKKOS_ARCH),AMDAVX) KOKKOS_INTERNAL_USE_ARCH_AMDAVX := $(call kokkos_has_string,$(KOKKOS_ARCH),AMDAVX)
KOKKOS_INTERNAL_USE_ARCH_ZEN4 := $(call kokkos_has_string,$(KOKKOS_ARCH),Zen4)
KOKKOS_INTERNAL_USE_ARCH_ZEN3 := $(call kokkos_has_string,$(KOKKOS_ARCH),Zen3) KOKKOS_INTERNAL_USE_ARCH_ZEN3 := $(call kokkos_has_string,$(KOKKOS_ARCH),Zen3)
KOKKOS_INTERNAL_USE_ARCH_ZEN2 := $(call kokkos_has_string,$(KOKKOS_ARCH),Zen2) KOKKOS_INTERNAL_USE_ARCH_ZEN2 := $(call kokkos_has_string,$(KOKKOS_ARCH),Zen2)
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ZEN4), 0)
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ZEN3), 0) ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ZEN3), 0)
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ZEN2), 0) ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ZEN2), 0)
KOKKOS_INTERNAL_USE_ARCH_ZEN := $(call kokkos_has_string,$(KOKKOS_ARCH),Zen) KOKKOS_INTERNAL_USE_ARCH_ZEN := $(call kokkos_has_string,$(KOKKOS_ARCH),Zen)
endif endif
endif endif
endif
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX906 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX906) KOKKOS_INTERNAL_USE_ARCH_AMD_GFX906 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX906)
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX906), 0) ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX906), 0)
@ -463,8 +474,10 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX90A), 0)
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX90A := $(call kokkos_has_string,$(KOKKOS_ARCH),VEGA90A) KOKKOS_INTERNAL_USE_ARCH_AMD_GFX90A := $(call kokkos_has_string,$(KOKKOS_ARCH),VEGA90A)
endif endif
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX940 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX940) KOKKOS_INTERNAL_USE_ARCH_AMD_GFX940 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX940)
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX942 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX942)
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX942_APU := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX942_APU) KOKKOS_INTERNAL_USE_ARCH_AMD_GFX942_APU := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX942_APU)
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX942_APU), 0)
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX942 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX942)
endif
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1030 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX1030) KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1030 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX1030)
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1030), 0) ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1030), 0)
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1030 := $(call kokkos_has_string,$(KOKKOS_ARCH),NAVI1030) KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1030 := $(call kokkos_has_string,$(KOKKOS_ARCH),NAVI1030)
@ -857,6 +870,19 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ZEN3), 1)
endif endif
endif endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ZEN4), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_ZEN4")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX512XEON")
ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
KOKKOS_CXXFLAGS += -xCORE-AVX512
KOKKOS_LDFLAGS += -xCORE-AVX512
else
KOKKOS_CXXFLAGS += -march=znver4 -mtune=znver4
KOKKOS_LDFLAGS += -march=znver4 -mtune=znver4
endif
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX), 1) ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARMV80") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARMV80")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARMV8_THUNDERX") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARMV8_THUNDERX")

View File

@ -18,24 +18,24 @@ Kokkos is a [Linux Foundation](https://linuxfoundation.org) project.
To start learning about Kokkos: To start learning about Kokkos:
- [Kokkos Lectures](https://kokkos.org/kokkos-core-wiki/videolectures.html): they contain a mix of lecture videos and hands-on exercises covering all the important capabilities. - [Kokkos Lectures](https://kokkos.org/kokkos-core-wiki/tutorials-and-examples/video-lectures.html): they contain a mix of lecture videos and hands-on exercises covering all the important capabilities.
- [Programming guide](https://kokkos.org/kokkos-core-wiki/programmingguide.html): contains in "narrative" form a technical description of the programming model, machine model, and the main building blocks like the Views and parallel dispatch. - [Programming guide](https://kokkos.org/kokkos-core-wiki/programmingguide.html): contains in "narrative" form a technical description of the programming model, machine model, and the main building blocks like the Views and parallel dispatch.
- [API reference](https://kokkos.org/kokkos-core-wiki/): organized by category, i.e., [core](https://kokkos.org/kokkos-core-wiki/API/core-index.html), [algorithms](https://kokkos.org/kokkos-core-wiki/API/algorithms-index.html) and [containers](https://kokkos.org/kokkos-core-wiki/API/containers-index.html) or, if you prefer, in [alphabetical order](https://kokkos.org/kokkos-core-wiki/API/alphabetical.html). - [API reference](https://kokkos.org/kokkos-core-wiki/): organized by category, i.e., [core](https://kokkos.org/kokkos-core-wiki/API/core-index.html), [algorithms](https://kokkos.org/kokkos-core-wiki/API/algorithms-index.html) and [containers](https://kokkos.org/kokkos-core-wiki/API/containers-index.html) or, if you prefer, in [alphabetical order](https://kokkos.org/kokkos-core-wiki/API/alphabetical.html).
- [Use cases and Examples](https://kokkos.org/kokkos-core-wiki/usecases.html): a serie of examples ranging from how to use Kokkos with MPI to Fortran interoperability. - [Use cases and Examples](https://kokkos.org/kokkos-core-wiki/tutorials-and-examples/use-cases-and-examples.html): a serie of examples ranging from how to use Kokkos with MPI to Fortran interoperability.
## Obtaining Kokkos ## Obtaining Kokkos
The latest release of Kokkos can be obtained from the [GitHub releases page](https://github.com/kokkos/kokkos/releases/latest). The latest release of Kokkos can be obtained from the [GitHub releases page](https://github.com/kokkos/kokkos/releases/latest).
The current release is [4.5.01](https://github.com/kokkos/kokkos/releases/tag/4.5.01). The current release is [4.6.00](https://github.com/kokkos/kokkos/releases/tag/4.6.00).
```bash ```bash
curl -OJ -L https://github.com/kokkos/kokkos/releases/download/4.5.01/kokkos-4.5.01.tar.gz curl -OJ -L https://github.com/kokkos/kokkos/releases/download/4.6.00/kokkos-4.6.00.tar.gz
# Or with wget # Or with wget
wget https://github.com/kokkos/kokkos/releases/download/4.5.01/kokkos-4.5.01.tar.gz wget https://github.com/kokkos/kokkos/releases/download/4.6.00/kokkos-4.6.00.tar.gz
``` ```
To clone the latest development version of Kokkos from GitHub: To clone the latest development version of Kokkos from GitHub:
@ -47,7 +47,7 @@ git clone -b develop https://github.com/kokkos/kokkos.git
### Building Kokkos ### Building Kokkos
To build Kokkos, you will need to have a C++ compiler that supports C++17 or later. To build Kokkos, you will need to have a C++ compiler that supports C++17 or later.
All requirements including minimum and primary tested compiler versions can be found [here](https://kokkos.org/kokkos-core-wiki/requirements.html). All requirements including minimum and primary tested compiler versions can be found [here](https://kokkos.org/kokkos-core-wiki/get-started/requirements.html).
Building and installation instructions are described [here](https://kokkos.org/kokkos-core-wiki/building.html). Building and installation instructions are described [here](https://kokkos.org/kokkos-core-wiki/building.html).

View File

@ -5,3 +5,7 @@ endif()
if(NOT ((KOKKOS_ENABLE_OPENMPTARGET AND KOKKOS_CXX_COMPILER_ID STREQUAL NVHPC) OR KOKKOS_ENABLE_OPENACC)) if(NOT ((KOKKOS_ENABLE_OPENMPTARGET AND KOKKOS_CXX_COMPILER_ID STREQUAL NVHPC) OR KOKKOS_ENABLE_OPENACC))
kokkos_add_test_directories(unit_tests) kokkos_add_test_directories(unit_tests)
endif() endif()
if(Kokkos_ENABLE_BENCHMARKS)
add_subdirectory(perf_test)
endif()

View File

@ -0,0 +1,63 @@
# FIXME: The following logic should be moved from here and also from `core/perf_test/CMakeLists.txt` to
# the root `CMakeLists.txt` in the form of a macro
# Find or download google/benchmark library
find_package(benchmark QUIET 1.5.6)
if(benchmark_FOUND)
message(STATUS "Using google benchmark found in ${benchmark_DIR}")
else()
message(STATUS "No installed google benchmark found, fetching from GitHub")
include(FetchContent)
set(BENCHMARK_ENABLE_TESTING OFF)
list(APPEND CMAKE_MESSAGE_INDENT "[benchmark] ")
FetchContent_Declare(
googlebenchmark
DOWNLOAD_EXTRACT_TIMESTAMP FALSE
URL https://github.com/google/benchmark/archive/refs/tags/v1.7.1.tar.gz
URL_HASH MD5=0459a6c530df9851bee6504c3e37c2e7
)
FetchContent_MakeAvailable(googlebenchmark)
list(POP_BACK CMAKE_MESSAGE_INDENT)
# Suppress clang-tidy diagnostics on code that we do not have control over
if(CMAKE_CXX_CLANG_TIDY)
set_target_properties(benchmark PROPERTIES CXX_CLANG_TIDY "")
endif()
# FIXME: Check whether the following target_compile_options are needed.
# If so, clarify why.
target_compile_options(benchmark PRIVATE -w)
target_compile_options(benchmark_main PRIVATE -w)
endif()
# FIXME: This function should be moved from here and also from `core/perf_test/CMakeLists.txt` to
# the root `CMakeLists.txt`
# FIXME: Could NAME be a one_value_keyword specified in cmake_parse_arguments?
function(KOKKOS_ADD_BENCHMARK NAME)
cmake_parse_arguments(BENCHMARK "" "" "SOURCES" ${ARGN})
if(DEFINED BENCHMARK_UNPARSED_ARGUMENTS)
message(WARNING "Unexpected arguments when adding a benchmark: " ${BENCHMARK_UNPARSED_ARGUMENTS})
endif()
set(BENCHMARK_NAME Kokkos_${NAME})
# FIXME: BenchmarkMain.cpp and Benchmark_Context.cpp should be moved to a common location from which
# they can be used by all performance tests.
list(APPEND BENCHMARK_SOURCES ../../core/perf_test/BenchmarkMain.cpp ../../core/perf_test/Benchmark_Context.cpp)
add_executable(${BENCHMARK_NAME} ${BENCHMARK_SOURCES})
target_link_libraries(${BENCHMARK_NAME} PRIVATE benchmark::benchmark Kokkos::kokkos impl_git_version)
target_include_directories(${BENCHMARK_NAME} SYSTEM PRIVATE ${benchmark_SOURCE_DIR}/include)
# FIXME: This alone will not work. It might need an architecture and standard which need to be defined on target level.
# It will potentially go away with #7582.
foreach(SOURCE_FILE ${BENCHMARK_SOURCES})
set_source_files_properties(${SOURCE_FILE} PROPERTIES LANGUAGE ${KOKKOS_COMPILE_LANGUAGE})
endforeach()
string(TIMESTAMP BENCHMARK_TIME "%Y-%m-%d_T%H-%M-%S" UTC)
set(BENCHMARK_ARGS --benchmark_counters_tabular=true --benchmark_out=${BENCHMARK_NAME}_${BENCHMARK_TIME}.json)
add_test(NAME ${BENCHMARK_NAME} COMMAND ${BENCHMARK_NAME} ${BENCHMARK_ARGS})
endfunction()
kokkos_add_benchmark(PerformanceTest_InclusiveScan SOURCES test_inclusive_scan.cpp)

View File

@ -0,0 +1,191 @@
//@HEADER
// ************************************************************************
//
// Kokkos v. 4.0
// Copyright (2022) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
// See https://kokkos.org/LICENSE for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//@HEADER
#include <cstddef>
#include <cstdint>
#include <tuple>
#include <type_traits>
#include <benchmark/benchmark.h>
#include <Kokkos_Core.hpp>
#include <Kokkos_Timer.hpp>
#include <Kokkos_StdAlgorithms.hpp>
// FIXME: Benchmark_Context.hpp should be moved to a common location
#include "../../core/perf_test/Benchmark_Context.hpp"
namespace {
namespace KE = Kokkos::Experimental;
using ExecSpace = Kokkos::DefaultExecutionSpace;
using HostExecSpace = Kokkos::DefaultHostExecutionSpace;
// A tag struct to identify when inclusive scan with the implicit sum
// based binary operation needs to be called.
template <class ValueType>
struct ImpSumBinOp;
template <class ValueType>
struct SumFunctor {
KOKKOS_FUNCTION
ValueType operator()(const ValueType& a, const ValueType& b) const {
return (a + b);
}
};
template <class ValueType>
struct MaxFunctor {
KOKKOS_FUNCTION
ValueType operator()(const ValueType& a, const ValueType& b) const {
if (a > b)
return a;
else
return b;
}
};
// Helper to obtain last element of a view
template <class T>
T obtain_last_elem(const Kokkos::View<T*, ExecSpace>& v) {
T last_element;
Kokkos::deep_copy(last_element, Kokkos::subview(v, v.extent(0) - 1));
return last_element;
}
// Helper to allocate input and output views
template <class T>
auto prepare_views(const std::size_t kProbSize) {
Kokkos::View<T*, ExecSpace> in{"input", kProbSize};
Kokkos::View<T*, ExecSpace> out{"output", kProbSize};
auto h_in = Kokkos::create_mirror_view(in);
for (std::size_t i = 0; i < kProbSize; ++i) {
h_in(i) = i;
}
Kokkos::deep_copy(in, h_in);
return std::make_tuple(in, out, h_in);
}
// Perform scan with a reference implementation
template <class T, class ViewType, class ScanFunctor = SumFunctor<T>>
T ref_scan(const ViewType& h_in, ScanFunctor scan_functor = ScanFunctor()) {
std::size_t view_size = h_in.extent(0);
Kokkos::View<T*, HostExecSpace> h_out("output", view_size);
// FIXME: We have GCC 8.4.0 based check in our ORNL Jenkins CI.
// std::inclusive_scan is available only from GCC 9.3. Since, GCC 9.1
// std::inclusive_scan that takes execution policy is available. However,
// there is error with <execution> header before GCC 10.1.
h_out(0) = h_in(0);
for (std::size_t i = 1; i < view_size; ++i) {
h_out(i) = scan_functor(h_in(i), h_out(i - 1));
}
return h_out(view_size - 1);
}
// Inclusive Scan with default binary operation (sum) or user provided functor
// Note: The nature of the functor must be compatible with the
// elements in the input and output views
template <class T, template <class> class ScanFunctor = ImpSumBinOp>
auto inclusive_scan(const Kokkos::View<T*, ExecSpace>& in,
const Kokkos::View<T*, ExecSpace>& out, T res_check) {
ExecSpace().fence();
Kokkos::Timer timer;
if constexpr (std::is_same_v<ScanFunctor<T>, ImpSumBinOp<T>>) {
KE::inclusive_scan("Default scan", ExecSpace(), KE::cbegin(in),
KE::cend(in), KE::begin(out));
} else {
KE::inclusive_scan("Scan using a functor", ExecSpace(), KE::cbegin(in),
KE::cend(in), KE::begin(out), ScanFunctor<T>());
}
ExecSpace().fence();
double time_scan = timer.seconds();
T res_scan = obtain_last_elem(out);
bool passed = (res_check == res_scan);
return std::make_tuple(time_scan, passed);
}
// Benchmark: Inclusive Scan with default binary operation (sum)
// or user provided functor
template <class T, template <class> class ScanFunctor = ImpSumBinOp>
void BM_inclusive_scan(benchmark::State& state) {
const std::size_t kProbSize = state.range(0);
auto [in, out, h_in] = prepare_views<T>(kProbSize);
T res_check;
if constexpr (std::is_same_v<ScanFunctor<T>, ImpSumBinOp<T>>) {
res_check = ref_scan<T>(h_in);
} else {
res_check = ref_scan<T>(h_in, ScanFunctor<T>());
}
double time_scan = 0.;
bool passed = false;
for (auto _ : state) {
if constexpr (std::is_same_v<ScanFunctor<T>, ImpSumBinOp<T>>) {
std::tie(time_scan, passed) = inclusive_scan<T>(in, out, res_check);
} else {
std::tie(time_scan, passed) =
inclusive_scan<T, ScanFunctor>(in, out, res_check);
}
KokkosBenchmark::report_results(state, in, 2, time_scan);
state.counters["Passed"] = passed;
}
}
constexpr std::size_t PROB_SIZE = 100'000'000;
} // anonymous namespace
// FIXME: Add logic to pass min. warm-up time. Also, the value should be set
// by the user. Say, via the environment variable BENCHMARK_MIN_WARMUP_TIME.
BENCHMARK(BM_inclusive_scan<std::uint64_t>)->Arg(PROB_SIZE)->UseManualTime();
BENCHMARK(BM_inclusive_scan<std::int64_t>)->Arg(PROB_SIZE)->UseManualTime();
BENCHMARK(BM_inclusive_scan<double>)->Arg(PROB_SIZE)->UseManualTime();
BENCHMARK(BM_inclusive_scan<std::uint64_t, SumFunctor>)
->Arg(PROB_SIZE)
->UseManualTime();
BENCHMARK(BM_inclusive_scan<std::int64_t, SumFunctor>)
->Arg(PROB_SIZE)
->UseManualTime();
BENCHMARK(BM_inclusive_scan<double, SumFunctor>)
->Arg(PROB_SIZE)
->UseManualTime();
BENCHMARK(BM_inclusive_scan<std::uint64_t, MaxFunctor>)
->Arg(PROB_SIZE)
->UseManualTime();
BENCHMARK(BM_inclusive_scan<std::int64_t, MaxFunctor>)
->Arg(PROB_SIZE)
->UseManualTime();
BENCHMARK(BM_inclusive_scan<double, MaxFunctor>)
->Arg(PROB_SIZE)
->UseManualTime();

View File

@ -587,11 +587,13 @@ struct Random_XorShift1024_State<false> {
int state_idx) int state_idx)
: state_(&v(state_idx, 0)), stride_(v.stride_1()) {} : state_(&v(state_idx, 0)), stride_(v.stride_1()) {}
// NOLINTBEGIN(bugprone-implicit-widening-of-multiplication-result)
KOKKOS_FUNCTION KOKKOS_FUNCTION
uint64_t operator[](const int i) const { return state_[i * stride_]; } uint64_t operator[](const int i) const { return state_[i * stride_]; }
KOKKOS_FUNCTION KOKKOS_FUNCTION
uint64_t& operator[](const int i) { return state_[i * stride_]; } uint64_t& operator[](const int i) { return state_[i * stride_]; }
// NOLINTEND(bugprone-implicit-widening-of-multiplication-result)
}; };
template <class ExecutionSpace> template <class ExecutionSpace>
@ -670,7 +672,12 @@ struct Random_UniqueIndex<Kokkos::Device<Kokkos::SYCL, MemorySpace>> {
View<int**, Kokkos::Device<Kokkos::SYCL, MemorySpace>>; View<int**, Kokkos::Device<Kokkos::SYCL, MemorySpace>>;
KOKKOS_FUNCTION KOKKOS_FUNCTION
static int get_state_idx(const locks_view_type& locks_) { static int get_state_idx(const locks_view_type& locks_) {
#if defined(KOKKOS_COMPILER_INTEL_LLVM) && \
KOKKOS_COMPILER_INTEL_LLVM >= 20250000
auto item = sycl::ext::oneapi::this_work_item::get_nd_item<3>();
#else
auto item = sycl::ext::oneapi::experimental::this_nd_item<3>(); auto item = sycl::ext::oneapi::experimental::this_nd_item<3>();
#endif
std::size_t threadIdx[3] = {item.get_local_id(2), item.get_local_id(1), std::size_t threadIdx[3] = {item.get_local_id(2), item.get_local_id(1),
item.get_local_id(0)}; item.get_local_id(0)};
std::size_t blockIdx[3] = {item.get_group(2), item.get_group(1), std::size_t blockIdx[3] = {item.get_group(2), item.get_group(1),

View File

@ -45,7 +45,7 @@ struct BinOp1D {
// For integral types the number of bins may be larger than the range // For integral types the number of bins may be larger than the range
// in which case we can exactly have one unique value per bin // in which case we can exactly have one unique value per bin
// and then don't need to sort bins. // and then don't need to sort bins.
if (std::is_integral<typename KeyViewType::const_value_type>::value && if (std::is_integral_v<typename KeyViewType::const_value_type> &&
(static_cast<double>(max) - static_cast<double>(min)) <= (static_cast<double>(max) - static_cast<double>(min)) <=
static_cast<double>(max_bins)) { static_cast<double>(max_bins)) {
mul_ = 1.; mul_ = 1.;

View File

@ -53,13 +53,9 @@ void sort(const ExecutionSpace& exec,
if constexpr (Impl::better_off_calling_std_sort_v<ExecutionSpace>) { if constexpr (Impl::better_off_calling_std_sort_v<ExecutionSpace>) {
exec.fence("Kokkos::sort without comparator use std::sort"); exec.fence("Kokkos::sort without comparator use std::sort");
if (view.span_is_contiguous()) {
std::sort(view.data(), view.data() + view.size());
} else {
auto first = ::Kokkos::Experimental::begin(view); auto first = ::Kokkos::Experimental::begin(view);
auto last = ::Kokkos::Experimental::end(view); auto last = ::Kokkos::Experimental::end(view);
std::sort(first, last); std::sort(first, last);
}
} else { } else {
Impl::sort_device_view_without_comparator(exec, view); Impl::sort_device_view_without_comparator(exec, view);
} }
@ -111,13 +107,9 @@ void sort(const ExecutionSpace& exec,
if constexpr (Impl::better_off_calling_std_sort_v<ExecutionSpace>) { if constexpr (Impl::better_off_calling_std_sort_v<ExecutionSpace>) {
exec.fence("Kokkos::sort with comparator use std::sort"); exec.fence("Kokkos::sort with comparator use std::sort");
if (view.span_is_contiguous()) {
std::sort(view.data(), view.data() + view.size(), comparator);
} else {
auto first = ::Kokkos::Experimental::begin(view); auto first = ::Kokkos::Experimental::begin(view);
auto last = ::Kokkos::Experimental::end(view); auto last = ::Kokkos::Experimental::end(view);
std::sort(first, last, comparator); std::sort(first, last, comparator);
}
} else { } else {
Impl::sort_device_view_with_comparator(exec, view, comparator); Impl::sort_device_view_with_comparator(exec, view, comparator);
} }

View File

@ -47,6 +47,7 @@
#ifdef _CubLog #ifdef _CubLog
#undef _CubLog #undef _CubLog
#endif #endif
// NOLINTNEXTLINE(bugprone-reserved-identifier)
#define _CubLog #define _CubLog
#include <thrust/device_ptr.h> #include <thrust/device_ptr.h>
#include <thrust/sort.h> #include <thrust/sort.h>
@ -65,12 +66,24 @@
#include <thrust/sort.h> #include <thrust/sort.h>
#endif #endif
#if defined(KOKKOS_ENABLE_ONEDPL) && \ #ifdef KOKKOS_ENABLE_ONEDPL
(ONEDPL_VERSION_MAJOR > 2022 || \ #define KOKKOS_IMPL_ONEDPL_VERSION \
(ONEDPL_VERSION_MAJOR == 2022 && ONEDPL_VERSION_MINOR >= 2)) ONEDPL_VERSION_MAJOR * 10000 + ONEDPL_VERSION_MINOR * 100 + \
#define KOKKOS_ONEDPL_HAS_SORT_BY_KEY ONEDPL_VERSION_PATCH
#define KOKKOS_IMPL_ONEDPL_VERSION_GREATER_EQUAL(MAJOR, MINOR, PATCH) \
(KOKKOS_IMPL_ONEDPL_VERSION >= ((MAJOR)*10000 + (MINOR)*100 + (PATCH)))
#if KOKKOS_IMPL_ONEDPL_VERSION_GREATER_EQUAL(2022, 2, 0)
#define KOKKOS_IMPL_ONEDPL_HAS_SORT_BY_KEY
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wshadow"
#pragma GCC diagnostic ignored "-Wunused-local-typedef"
#pragma GCC diagnostic ignored "-Wunused-parameter"
#pragma GCC diagnostic ignored "-Wunused-variable"
#include <oneapi/dpl/execution> #include <oneapi/dpl/execution>
#include <oneapi/dpl/algorithm> #include <oneapi/dpl/algorithm>
#pragma GCC diagnostic pop
#endif
#endif #endif
namespace Kokkos::Impl { namespace Kokkos::Impl {
@ -141,12 +154,18 @@ void sort_by_key_rocthrust(
#endif #endif
#if defined(KOKKOS_ENABLE_ONEDPL) #if defined(KOKKOS_ENABLE_ONEDPL)
#if KOKKOS_IMPL_ONEDPL_VERSION_GREATER_EQUAL(2022, 7, 1)
template <class Layout>
inline constexpr bool sort_on_device_v<Kokkos::SYCL, Layout> = true;
#else
template <class Layout> template <class Layout>
inline constexpr bool sort_on_device_v<Kokkos::SYCL, Layout> = inline constexpr bool sort_on_device_v<Kokkos::SYCL, Layout> =
std::is_same_v<Layout, Kokkos::LayoutLeft> || std::is_same_v<Layout, Kokkos::LayoutLeft> ||
std::is_same_v<Layout, Kokkos::LayoutRight>; std::is_same_v<Layout, Kokkos::LayoutRight>;
#endif
#ifdef KOKKOS_ONEDPL_HAS_SORT_BY_KEY #ifdef KOKKOS_IMPL_ONEDPL_HAS_SORT_BY_KEY
template <class KeysDataType, class... KeysProperties, class ValuesDataType, template <class KeysDataType, class... KeysProperties, class ValuesDataType,
class... ValuesProperties, class... MaybeComparator> class... ValuesProperties, class... MaybeComparator>
void sort_by_key_onedpl( void sort_by_key_onedpl(
@ -154,6 +173,14 @@ void sort_by_key_onedpl(
const Kokkos::View<KeysDataType, KeysProperties...>& keys, const Kokkos::View<KeysDataType, KeysProperties...>& keys,
const Kokkos::View<ValuesDataType, ValuesProperties...>& values, const Kokkos::View<ValuesDataType, ValuesProperties...>& values,
MaybeComparator&&... maybeComparator) { MaybeComparator&&... maybeComparator) {
auto queue = exec.sycl_queue();
auto policy = oneapi::dpl::execution::make_device_policy(queue);
#if KOKKOS_IMPL_ONEDPL_VERSION_GREATER_EQUAL(2022, 7, 1)
oneapi::dpl::sort_by_key(policy, ::Kokkos::Experimental::begin(keys),
::Kokkos::Experimental::end(keys),
::Kokkos::Experimental::begin(values),
std::forward<MaybeComparator>(maybeComparator)...);
#else
if (keys.stride(0) != 1 && values.stride(0) != 1) { if (keys.stride(0) != 1 && values.stride(0) != 1) {
Kokkos::abort( Kokkos::abort(
"SYCL sort_by_key only supports rank-1 Views with stride(0) = 1."); "SYCL sort_by_key only supports rank-1 Views with stride(0) = 1.");
@ -161,11 +188,10 @@ void sort_by_key_onedpl(
// Can't use Experimental::begin/end here since the oneDPL then assumes that // Can't use Experimental::begin/end here since the oneDPL then assumes that
// the data is on the host. // the data is on the host.
auto queue = exec.sycl_queue();
auto policy = oneapi::dpl::execution::make_device_policy(queue);
const int n = keys.extent(0); const int n = keys.extent(0);
oneapi::dpl::sort_by_key(policy, keys.data(), keys.data() + n, values.data(), oneapi::dpl::sort_by_key(policy, keys.data(), keys.data() + n, values.data(),
std::forward<MaybeComparator>(maybeComparator)...); std::forward<MaybeComparator>(maybeComparator)...);
#endif
} }
#endif #endif
#endif #endif
@ -336,12 +362,18 @@ void sort_by_key_device_view_without_comparator(
const Kokkos::SYCL& exec, const Kokkos::SYCL& exec,
const Kokkos::View<KeysDataType, KeysProperties...>& keys, const Kokkos::View<KeysDataType, KeysProperties...>& keys,
const Kokkos::View<ValuesDataType, ValuesProperties...>& values) { const Kokkos::View<ValuesDataType, ValuesProperties...>& values) {
#ifdef KOKKOS_ONEDPL_HAS_SORT_BY_KEY #ifdef KOKKOS_IMPL_ONEDPL_HAS_SORT_BY_KEY
#if KOKKOS_IMPL_ONEDPL_VERSION_GREATER_EQUAL(2022, 7, 1)
sort_by_key_onedpl(exec, keys, values);
#else
if (keys.stride(0) == 1 && values.stride(0) == 1) if (keys.stride(0) == 1 && values.stride(0) == 1)
sort_by_key_onedpl(exec, keys, values); sort_by_key_onedpl(exec, keys, values);
else else
#endif
sort_by_key_via_sort(exec, keys, values); sort_by_key_via_sort(exec, keys, values);
#endif
#else
sort_by_key_via_sort(exec, keys, values);
#endif
} }
#endif #endif
@ -394,12 +426,18 @@ void sort_by_key_device_view_with_comparator(
const Kokkos::View<KeysDataType, KeysProperties...>& keys, const Kokkos::View<KeysDataType, KeysProperties...>& keys,
const Kokkos::View<ValuesDataType, ValuesProperties...>& values, const Kokkos::View<ValuesDataType, ValuesProperties...>& values,
const ComparatorType& comparator) { const ComparatorType& comparator) {
#ifdef KOKKOS_ONEDPL_HAS_SORT_BY_KEY #ifdef KOKKOS_IMPL_ONEDPL_HAS_SORT_BY_KEY
#if KOKKOS_IMPL_ONEDPL_VERSION_GREATER_EQUAL(2022, 7, 1)
sort_by_key_onedpl(exec, keys, values, comparator);
#else
if (keys.stride(0) == 1 && values.stride(0) == 1) if (keys.stride(0) == 1 && values.stride(0) == 1)
sort_by_key_onedpl(exec, keys, values, comparator); sort_by_key_onedpl(exec, keys, values, comparator);
else else
#endif
sort_by_key_via_sort(exec, keys, values, comparator); sort_by_key_via_sort(exec, keys, values, comparator);
#endif
#else
sort_by_key_via_sort(exec, keys, values, comparator);
#endif
} }
#endif #endif
@ -416,7 +454,9 @@ sort_by_key_device_view_with_comparator(
sort_by_key_via_sort(exec, keys, values, comparator); sort_by_key_via_sort(exec, keys, values, comparator);
} }
#undef KOKKOS_ONEDPL_HAS_SORT_BY_KEY #undef KOKKOS_IMPL_ONEDPL_HAS_SORT_BY_KEY
} // namespace Kokkos::Impl } // namespace Kokkos::Impl
#undef KOKKOS_IMPL_ONEDPL_VERSION
#undef KOKKOS_IMPL_ONEDPL_VERSION_GREATER_EQUAL
#endif #endif

View File

@ -51,6 +51,7 @@
#ifdef _CubLog #ifdef _CubLog
#undef _CubLog #undef _CubLog
#endif #endif
// NOLINTNEXTLINE(bugprone-reserved-identifier)
#define _CubLog #define _CubLog
#include <thrust/device_ptr.h> #include <thrust/device_ptr.h>
#include <thrust/sort.h> #include <thrust/sort.h>
@ -70,8 +71,20 @@
#endif #endif
#if defined(KOKKOS_ENABLE_ONEDPL) #if defined(KOKKOS_ENABLE_ONEDPL)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wshadow"
#pragma GCC diagnostic ignored "-Wunused-local-typedef"
#pragma GCC diagnostic ignored "-Wunused-parameter"
#pragma GCC diagnostic ignored "-Wunused-variable"
#include <oneapi/dpl/execution> #include <oneapi/dpl/execution>
#include <oneapi/dpl/algorithm> #include <oneapi/dpl/algorithm>
#pragma GCC diagnostic pop
#define KOKKOS_IMPL_ONEDPL_VERSION \
ONEDPL_VERSION_MAJOR * 10000 + ONEDPL_VERSION_MINOR * 100 + \
ONEDPL_VERSION_PATCH
#define KOKKOS_IMPL_ONEDPL_VERSION_GREATER_EQUAL(MAJOR, MINOR, PATCH) \
(KOKKOS_IMPL_ONEDPL_VERSION >= ((MAJOR)*10000 + (MINOR)*100 + (PATCH)))
#endif #endif
namespace Kokkos { namespace Kokkos {
@ -221,6 +234,10 @@ void sort_onedpl(const Kokkos::SYCL& space,
"SYCL execution space is not able to access the memory space " "SYCL execution space is not able to access the memory space "
"of the View argument!"); "of the View argument!");
#if KOKKOS_IMPL_ONEDPL_VERSION_GREATER_EQUAL(2022, 7, 1)
static_assert(ViewType::rank == 1,
"Kokkos::sort currently only supports rank-1 Views.");
#else
static_assert( static_assert(
(ViewType::rank == 1) && (ViewType::rank == 1) &&
(std::is_same_v<typename ViewType::array_layout, LayoutRight> || (std::is_same_v<typename ViewType::array_layout, LayoutRight> ||
@ -234,18 +251,26 @@ void sort_onedpl(const Kokkos::SYCL& space,
if (view.stride(0) != 1) { if (view.stride(0) != 1) {
Kokkos::abort("SYCL sort only supports rank-1 Views with stride(0) = 1."); Kokkos::abort("SYCL sort only supports rank-1 Views with stride(0) = 1.");
} }
#endif
if (view.extent(0) <= 1) { if (view.extent(0) <= 1) {
return; return;
} }
// Can't use Experimental::begin/end here since the oneDPL then assumes that
// the data is on the host.
auto queue = space.sycl_queue(); auto queue = space.sycl_queue();
auto policy = oneapi::dpl::execution::make_device_policy(queue); auto policy = oneapi::dpl::execution::make_device_policy(queue);
#if KOKKOS_IMPL_ONEDPL_VERSION_GREATER_EQUAL(2022, 7, 1)
oneapi::dpl::sort(policy, ::Kokkos::Experimental::begin(view),
::Kokkos::Experimental::end(view),
std::forward<MaybeComparator>(maybeComparator)...);
#else
// Can't use Experimental::begin/end here since the oneDPL then assumes that
// the data is on the host.
const int n = view.extent(0); const int n = view.extent(0);
oneapi::dpl::sort(policy, view.data(), view.data() + n, oneapi::dpl::sort(policy, view.data(), view.data() + n,
std::forward<MaybeComparator>(maybeComparator)...); std::forward<MaybeComparator>(maybeComparator)...);
#endif
} }
#endif #endif
@ -270,28 +295,18 @@ void copy_to_host_run_stdsort_copy_back(
// run sort on the mirror of view_dc // run sort on the mirror of view_dc
auto mv_h = create_mirror_view_and_copy(Kokkos::HostSpace(), view_dc); auto mv_h = create_mirror_view_and_copy(Kokkos::HostSpace(), view_dc);
if (view.span_is_contiguous()) {
std::sort(mv_h.data(), mv_h.data() + mv_h.size(),
std::forward<MaybeComparator>(maybeComparator)...);
} else {
auto first = KE::begin(mv_h); auto first = KE::begin(mv_h);
auto last = KE::end(mv_h); auto last = KE::end(mv_h);
std::sort(first, last, std::forward<MaybeComparator>(maybeComparator)...); std::sort(first, last, std::forward<MaybeComparator>(maybeComparator)...);
}
Kokkos::deep_copy(exec, view_dc, mv_h); Kokkos::deep_copy(exec, view_dc, mv_h);
// copy back to argument view // copy back to argument view
KE::copy(exec, KE::cbegin(view_dc), KE::cend(view_dc), KE::begin(view)); KE::copy(exec, KE::cbegin(view_dc), KE::cend(view_dc), KE::begin(view));
} else { } else {
auto view_h = create_mirror_view_and_copy(Kokkos::HostSpace(), view); auto view_h = create_mirror_view_and_copy(Kokkos::HostSpace(), view);
if (view.span_is_contiguous()) {
std::sort(view_h.data(), view_h.data() + view_h.size(),
std::forward<MaybeComparator>(maybeComparator)...);
} else {
auto first = KE::begin(view_h); auto first = KE::begin(view_h);
auto last = KE::end(view_h); auto last = KE::end(view_h);
std::sort(first, last, std::forward<MaybeComparator>(maybeComparator)...); std::sort(first, last, std::forward<MaybeComparator>(maybeComparator)...);
}
Kokkos::deep_copy(exec, view, view_h); Kokkos::deep_copy(exec, view, view_h);
} }
} }
@ -332,11 +347,15 @@ void sort_device_view_without_comparator(
"sort_device_view_without_comparator: supports rank-1 Views " "sort_device_view_without_comparator: supports rank-1 Views "
"with LayoutLeft, LayoutRight or LayoutStride"); "with LayoutLeft, LayoutRight or LayoutStride");
#if KOKKOS_IMPL_ONEDPL_VERSION_GREATER_EQUAL(2022, 7, 1)
sort_onedpl(exec, view);
#else
if (view.stride(0) == 1) { if (view.stride(0) == 1) {
sort_onedpl(exec, view); sort_onedpl(exec, view);
} else { } else {
copy_to_host_run_stdsort_copy_back(exec, view); copy_to_host_run_stdsort_copy_back(exec, view);
} }
#endif
} }
#endif #endif
@ -387,11 +406,15 @@ void sort_device_view_with_comparator(
"sort_device_view_with_comparator: supports rank-1 Views " "sort_device_view_with_comparator: supports rank-1 Views "
"with LayoutLeft, LayoutRight or LayoutStride"); "with LayoutLeft, LayoutRight or LayoutStride");
#if KOKKOS_IMPL_ONEDPL_VERSION_GREATER_EQUAL(2022, 7, 1)
sort_onedpl(exec, view, comparator);
#else
if (view.stride(0) == 1) { if (view.stride(0) == 1) {
sort_onedpl(exec, view, comparator); sort_onedpl(exec, view, comparator);
} else { } else {
copy_to_host_run_stdsort_copy_back(exec, view, comparator); copy_to_host_run_stdsort_copy_back(exec, view, comparator);
} }
#endif
} }
#endif #endif
@ -423,4 +446,7 @@ sort_device_view_with_comparator(
} // namespace Impl } // namespace Impl
} // namespace Kokkos } // namespace Kokkos
#undef KOKKOS_IMPL_ONEDPL_VERSION
#undef KOKKOS_IMPL_ONEDPL_VERSION_GREATER_EQUAL
#endif #endif

View File

@ -238,12 +238,9 @@ KOKKOS_INLINE_FUNCTION void expect_no_overlap(
[[maybe_unused]] IteratorType2 s_first) { [[maybe_unused]] IteratorType2 s_first) {
if constexpr (is_kokkos_iterator_v<IteratorType1> && if constexpr (is_kokkos_iterator_v<IteratorType1> &&
is_kokkos_iterator_v<IteratorType2>) { is_kokkos_iterator_v<IteratorType2>) {
auto const view1 = first.view(); std::size_t stride1 = first.stride();
auto const view2 = s_first.view(); std::size_t stride2 = s_first.stride();
ptrdiff_t first_diff = first.data() - s_first.data();
std::size_t stride1 = view1.stride(0);
std::size_t stride2 = view2.stride(0);
ptrdiff_t first_diff = view1.data() - view2.data();
// FIXME If strides are not identical, checks may not be made // FIXME If strides are not identical, checks may not be made
// with the cost of O(1) // with the cost of O(1)
@ -251,8 +248,8 @@ KOKKOS_INLINE_FUNCTION void expect_no_overlap(
// If first_diff == 0, there is already an overlap // If first_diff == 0, there is already an overlap
if (stride1 == stride2 || first_diff == 0) { if (stride1 == stride2 || first_diff == 0) {
[[maybe_unused]] bool is_no_overlap = (first_diff % stride1); [[maybe_unused]] bool is_no_overlap = (first_diff % stride1);
auto* first_pointer1 = view1.data(); auto* first_pointer1 = first.data();
auto* first_pointer2 = view2.data(); auto* first_pointer2 = s_first.data();
[[maybe_unused]] auto* last_pointer1 = first_pointer1 + (last - first); [[maybe_unused]] auto* last_pointer1 = first_pointer1 + (last - first);
[[maybe_unused]] auto* last_pointer2 = first_pointer2 + (last - first); [[maybe_unused]] auto* last_pointer2 = first_pointer2 + (last - first);
KOKKOS_EXPECTS(first_pointer1 >= last_pointer2 || KOKKOS_EXPECTS(first_pointer1 >= last_pointer2 ||

View File

@ -150,9 +150,8 @@ KOKKOS_FUNCTION OutputIterator copy_if_team_impl(
return d_first + count; return d_first + count;
} }
#if defined KOKKOS_COMPILER_INTEL || \ #if defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \
(defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \ !defined(KOKKOS_COMPILER_MSVC)
!defined(KOKKOS_COMPILER_MSVC))
__builtin_unreachable(); __builtin_unreachable();
#endif #endif
} }

View File

@ -103,7 +103,7 @@ OutputIteratorType exclusive_scan_custom_op_exespace_impl(
// aliases // aliases
using index_type = typename InputIteratorType::difference_type; using index_type = typename InputIteratorType::difference_type;
using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor<ValueType>; using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor;
using func_type = TransformExclusiveScanFunctorWithValueWrapper< using func_type = TransformExclusiveScanFunctorWithValueWrapper<
ExecutionSpace, index_type, ValueType, InputIteratorType, ExecutionSpace, index_type, ValueType, InputIteratorType,
OutputIteratorType, BinaryOpType, unary_op_type>; OutputIteratorType, BinaryOpType, unary_op_type>;
@ -177,7 +177,7 @@ KOKKOS_FUNCTION OutputIteratorType exclusive_scan_custom_op_team_impl(
// aliases // aliases
using exe_space = typename TeamHandleType::execution_space; using exe_space = typename TeamHandleType::execution_space;
using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor<ValueType>; using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor;
using index_type = typename InputIteratorType::difference_type; using index_type = typename InputIteratorType::difference_type;
using func_type = TransformExclusiveScanFunctorWithoutValueWrapper< using func_type = TransformExclusiveScanFunctorWithoutValueWrapper<
exe_space, index_type, ValueType, InputIteratorType, OutputIteratorType, exe_space, index_type, ValueType, InputIteratorType, OutputIteratorType,

View File

@ -23,10 +23,11 @@ namespace Kokkos {
namespace Experimental { namespace Experimental {
namespace Impl { namespace Impl {
template <class ValueType>
struct StdNumericScanIdentityReferenceUnaryFunctor { struct StdNumericScanIdentityReferenceUnaryFunctor {
KOKKOS_FUNCTION template <class T>
constexpr const ValueType& operator()(const ValueType& a) const { return a; } KOKKOS_FUNCTION constexpr T&& operator()(T&& t) const {
return static_cast<T&&>(t);
}
}; };
} // namespace Impl } // namespace Impl

View File

@ -18,12 +18,60 @@
#define KOKKOS_STD_ALGORITHMS_INCLUSIVE_SCAN_IMPL_HPP #define KOKKOS_STD_ALGORITHMS_INCLUSIVE_SCAN_IMPL_HPP
#include <Kokkos_Core.hpp> #include <Kokkos_Core.hpp>
#include <Kokkos_Profiling_ScopedRegion.hpp>
#include "Kokkos_Constraints.hpp" #include "Kokkos_Constraints.hpp"
#include "Kokkos_HelperPredicates.hpp" #include "Kokkos_HelperPredicates.hpp"
#include <std_algorithms/Kokkos_TransformInclusiveScan.hpp> #include <std_algorithms/Kokkos_TransformInclusiveScan.hpp>
#include <std_algorithms/Kokkos_Distance.hpp> #include <std_algorithms/Kokkos_Distance.hpp>
#include <string> #include <string>
#if defined(KOKKOS_ENABLE_CUDA)
// Workaround for `Instruction 'shfl' without '.sync' is not supported on
// .target sm_70 and higher from PTX ISA version 6.4`.
// Also see https://github.com/NVIDIA/cub/pull/170.
#if !defined(CUB_USE_COOPERATIVE_GROUPS)
#define CUB_USE_COOPERATIVE_GROUPS
#endif
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wshadow"
#pragma GCC diagnostic ignored "-Wsuggest-override"
#if defined(KOKKOS_COMPILER_CLANG)
// Some versions of Clang fail to compile Thrust, failing with errors like
// this:
// <snip>/thrust/system/cuda/detail/core/agent_launcher.h:557:11:
// error: use of undeclared identifier 'va_printf'
// The exact combination of versions for Clang and Thrust (or CUDA) for this
// failure was not investigated, however even very recent version combination
// (Clang 10.0.0 and Cuda 10.0) demonstrated failure.
//
// Defining _CubLog here locally allows us to avoid that code path, however
// disabling some debugging diagnostics
#pragma push_macro("_CubLog")
#ifdef _CubLog
#undef _CubLog
#endif
// NOLINTNEXTLINE(bugprone-reserved-identifier)
#define _CubLog
#include <thrust/distance.h>
#include <thrust/scan.h>
#pragma pop_macro("_CubLog")
#else
#include <thrust/distance.h>
#include <thrust/scan.h>
#endif
#pragma GCC diagnostic pop
#endif
#if defined(KOKKOS_ENABLE_ROCTHRUST)
#include <thrust/distance.h>
#include <thrust/scan.h>
#endif
namespace Kokkos { namespace Kokkos {
namespace Experimental { namespace Experimental {
namespace Impl { namespace Impl {
@ -101,9 +149,48 @@ struct InclusiveScanDefaultFunctor {
} }
}; };
// // -------------------------------------------------------------
// exespace impl // inclusive_scan_default_op_exespace_impl
// // -------------------------------------------------------------
#if defined(KOKKOS_ENABLE_CUDA)
template <class InputIteratorType, class OutputIteratorType>
OutputIteratorType inclusive_scan_default_op_exespace_impl(
const std::string& label, const Cuda& ex, InputIteratorType first_from,
InputIteratorType last_from, OutputIteratorType first_dest) {
const auto thrust_ex = thrust::cuda::par.on(ex.cuda_stream());
Kokkos::Profiling::pushRegion(label + " via thrust::inclusive_scan");
thrust::inclusive_scan(thrust_ex, first_from, last_from, first_dest);
Kokkos::Profiling::popRegion();
const auto num_elements = thrust::distance(first_from, last_from);
return first_dest + num_elements;
}
#endif
#if defined(KOKKOS_ENABLE_ROCTHRUST)
template <class InputIteratorType, class OutputIteratorType>
OutputIteratorType inclusive_scan_default_op_exespace_impl(
const std::string& label, const HIP& ex, InputIteratorType first_from,
InputIteratorType last_from, OutputIteratorType first_dest) {
const auto thrust_ex = thrust::hip::par.on(ex.hip_stream());
Kokkos::Profiling::pushRegion(label + " via thrust::inclusive_scan");
thrust::inclusive_scan(thrust_ex, first_from, last_from, first_dest);
Kokkos::Profiling::popRegion();
const auto num_elements = thrust::distance(first_from, last_from);
return first_dest + num_elements;
}
#endif
template <class ExecutionSpace, class InputIteratorType, template <class ExecutionSpace, class InputIteratorType,
class OutputIteratorType> class OutputIteratorType>
OutputIteratorType inclusive_scan_default_op_exespace_impl( OutputIteratorType inclusive_scan_default_op_exespace_impl(
@ -132,11 +219,16 @@ OutputIteratorType inclusive_scan_default_op_exespace_impl(
// run // run
const auto num_elements = const auto num_elements =
Kokkos::Experimental::distance(first_from, last_from); Kokkos::Experimental::distance(first_from, last_from);
Kokkos::Profiling::pushRegion(label + " via Kokkos::parallel_scan");
::Kokkos::parallel_scan(label, ::Kokkos::parallel_scan(label,
RangePolicy<ExecutionSpace>(ex, 0, num_elements), RangePolicy<ExecutionSpace>(ex, 0, num_elements),
func_type(first_from, first_dest)); func_type(first_from, first_dest));
ex.fence("Kokkos::inclusive_scan_default_op: fence after operation"); ex.fence("Kokkos::inclusive_scan_default_op: fence after operation");
Kokkos::Profiling::popRegion();
// return // return
return first_dest + num_elements; return first_dest + num_elements;
} }
@ -144,6 +236,49 @@ OutputIteratorType inclusive_scan_default_op_exespace_impl(
// ------------------------------------------------------------- // -------------------------------------------------------------
// inclusive_scan_custom_binary_op_impl // inclusive_scan_custom_binary_op_impl
// ------------------------------------------------------------- // -------------------------------------------------------------
#if defined(KOKKOS_ENABLE_CUDA)
template <class InputIteratorType, class OutputIteratorType, class BinaryOpType>
OutputIteratorType inclusive_scan_custom_binary_op_exespace_impl(
const std::string& label, const Cuda& ex, InputIteratorType first_from,
InputIteratorType last_from, OutputIteratorType first_dest,
BinaryOpType binary_op) {
const auto thrust_ex = thrust::cuda::par.on(ex.cuda_stream());
Kokkos::Profiling::pushRegion(label + " via thrust::inclusive_scan");
thrust::inclusive_scan(thrust_ex, first_from, last_from, first_dest,
binary_op);
Kokkos::Profiling::popRegion();
const auto num_elements = thrust::distance(first_from, last_from);
return first_dest + num_elements;
}
#endif
#if defined(KOKKOS_ENABLE_ROCTHRUST)
template <class InputIteratorType, class OutputIteratorType, class BinaryOpType>
OutputIteratorType inclusive_scan_custom_binary_op_exespace_impl(
const std::string& label, const HIP& ex, InputIteratorType first_from,
InputIteratorType last_from, OutputIteratorType first_dest,
BinaryOpType binary_op) {
const auto thrust_ex = thrust::hip::par.on(ex.hip_stream());
Kokkos::Profiling::pushRegion(label + " via thrust::inclusive_scan");
thrust::inclusive_scan(thrust_ex, first_from, last_from, first_dest,
binary_op);
Kokkos::Profiling::popRegion();
const auto num_elements = thrust::distance(first_from, last_from);
return first_dest + num_elements;
}
#endif
template <class ExecutionSpace, class InputIteratorType, template <class ExecutionSpace, class InputIteratorType,
class OutputIteratorType, class BinaryOpType> class OutputIteratorType, class BinaryOpType>
OutputIteratorType inclusive_scan_custom_binary_op_exespace_impl( OutputIteratorType inclusive_scan_custom_binary_op_exespace_impl(
@ -160,7 +295,7 @@ OutputIteratorType inclusive_scan_custom_binary_op_exespace_impl(
using index_type = typename InputIteratorType::difference_type; using index_type = typename InputIteratorType::difference_type;
using value_type = using value_type =
std::remove_const_t<typename InputIteratorType::value_type>; std::remove_const_t<typename InputIteratorType::value_type>;
using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor<value_type>; using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor;
using func_type = ExeSpaceTransformInclusiveScanNoInitValueFunctor< using func_type = ExeSpaceTransformInclusiveScanNoInitValueFunctor<
ExecutionSpace, index_type, value_type, InputIteratorType, ExecutionSpace, index_type, value_type, InputIteratorType,
OutputIteratorType, BinaryOpType, unary_op_type>; OutputIteratorType, BinaryOpType, unary_op_type>;
@ -168,11 +303,16 @@ OutputIteratorType inclusive_scan_custom_binary_op_exespace_impl(
// run // run
const auto num_elements = const auto num_elements =
Kokkos::Experimental::distance(first_from, last_from); Kokkos::Experimental::distance(first_from, last_from);
Kokkos::Profiling::pushRegion(label + " via Kokkos::parallel_scan");
::Kokkos::parallel_scan( ::Kokkos::parallel_scan(
label, RangePolicy<ExecutionSpace>(ex, 0, num_elements), label, RangePolicy<ExecutionSpace>(ex, 0, num_elements),
func_type(first_from, first_dest, binary_op, unary_op_type())); func_type(first_from, first_dest, binary_op, unary_op_type()));
ex.fence("Kokkos::inclusive_scan_custom_binary_op: fence after operation"); ex.fence("Kokkos::inclusive_scan_custom_binary_op: fence after operation");
Kokkos::Profiling::popRegion();
// return // return
return first_dest + num_elements; return first_dest + num_elements;
} }
@ -195,7 +335,7 @@ OutputIteratorType inclusive_scan_custom_binary_op_exespace_impl(
// aliases // aliases
using index_type = typename InputIteratorType::difference_type; using index_type = typename InputIteratorType::difference_type;
using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor<ValueType>; using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor;
using func_type = ExeSpaceTransformInclusiveScanWithInitValueFunctor< using func_type = ExeSpaceTransformInclusiveScanWithInitValueFunctor<
ExecutionSpace, index_type, ValueType, InputIteratorType, ExecutionSpace, index_type, ValueType, InputIteratorType,
OutputIteratorType, BinaryOpType, unary_op_type>; OutputIteratorType, BinaryOpType, unary_op_type>;
@ -203,12 +343,17 @@ OutputIteratorType inclusive_scan_custom_binary_op_exespace_impl(
// run // run
const auto num_elements = const auto num_elements =
Kokkos::Experimental::distance(first_from, last_from); Kokkos::Experimental::distance(first_from, last_from);
Kokkos::Profiling::pushRegion(label + " via Kokkos::parallel_scan");
::Kokkos::parallel_scan(label, ::Kokkos::parallel_scan(label,
RangePolicy<ExecutionSpace>(ex, 0, num_elements), RangePolicy<ExecutionSpace>(ex, 0, num_elements),
func_type(first_from, first_dest, binary_op, func_type(first_from, first_dest, binary_op,
unary_op_type(), std::move(init_value))); unary_op_type(), std::move(init_value)));
ex.fence("Kokkos::inclusive_scan_custom_binary_op: fence after operation"); ex.fence("Kokkos::inclusive_scan_custom_binary_op: fence after operation");
Kokkos::Profiling::popRegion();
// return // return
return first_dest + num_elements; return first_dest + num_elements;
} }
@ -283,7 +428,7 @@ KOKKOS_FUNCTION OutputIteratorType inclusive_scan_custom_binary_op_team_impl(
// aliases // aliases
using exe_space = typename TeamHandleType::execution_space; using exe_space = typename TeamHandleType::execution_space;
using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor<value_type>; using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor;
using func_type = TeamTransformInclusiveScanNoInitValueFunctor< using func_type = TeamTransformInclusiveScanNoInitValueFunctor<
exe_space, value_type, InputIteratorType, OutputIteratorType, exe_space, value_type, InputIteratorType, OutputIteratorType,
BinaryOpType, unary_op_type>; BinaryOpType, unary_op_type>;
@ -291,7 +436,6 @@ KOKKOS_FUNCTION OutputIteratorType inclusive_scan_custom_binary_op_team_impl(
// run // run
const auto num_elements = const auto num_elements =
Kokkos::Experimental::distance(first_from, last_from); Kokkos::Experimental::distance(first_from, last_from);
::Kokkos::parallel_scan( ::Kokkos::parallel_scan(
TeamThreadRange(teamHandle, 0, num_elements), TeamThreadRange(teamHandle, 0, num_elements),
func_type(first_from, first_dest, binary_op, unary_op_type())); func_type(first_from, first_dest, binary_op, unary_op_type()));
@ -325,7 +469,7 @@ KOKKOS_FUNCTION OutputIteratorType inclusive_scan_custom_binary_op_team_impl(
// aliases // aliases
using exe_space = typename TeamHandleType::execution_space; using exe_space = typename TeamHandleType::execution_space;
using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor<ValueType>; using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor;
using func_type = TeamTransformInclusiveScanWithInitValueFunctor< using func_type = TeamTransformInclusiveScanWithInitValueFunctor<
exe_space, ValueType, InputIteratorType, OutputIteratorType, BinaryOpType, exe_space, ValueType, InputIteratorType, OutputIteratorType, BinaryOpType,
unary_op_type>; unary_op_type>;

View File

@ -18,6 +18,7 @@
#define KOKKOS_RANDOM_ACCESS_ITERATOR_IMPL_HPP #define KOKKOS_RANDOM_ACCESS_ITERATOR_IMPL_HPP
#include <iterator> #include <iterator>
#include <utility> // declval
#include <Kokkos_Macros.hpp> #include <Kokkos_Macros.hpp>
#include <Kokkos_View.hpp> #include <Kokkos_View.hpp>
#include "Kokkos_Constraints.hpp" #include "Kokkos_Constraints.hpp"
@ -29,6 +30,27 @@ namespace Impl {
template <class T> template <class T>
class RandomAccessIterator; class RandomAccessIterator;
namespace {
template <typename ViewType>
struct is_always_strided {
static_assert(is_view_v<ViewType>);
constexpr static bool value =
#ifdef KOKKOS_ENABLE_IMPL_MDSPAN
decltype(std::declval<ViewType>().to_mdspan())::is_always_strided();
#else
(std::is_same_v<typename ViewType::traits::array_layout,
Kokkos::LayoutLeft> ||
std::is_same_v<typename ViewType::traits::array_layout,
Kokkos::LayoutRight> ||
std::is_same_v<typename ViewType::traits::array_layout,
Kokkos::LayoutStride>);
#endif
};
} // namespace
template <class DataType, class... Args> template <class DataType, class... Args>
class RandomAccessIterator<::Kokkos::View<DataType, Args...>> { class RandomAccessIterator<::Kokkos::View<DataType, Args...>> {
public: public:
@ -41,30 +63,31 @@ class RandomAccessIterator< ::Kokkos::View<DataType, Args...> > {
using pointer = typename view_type::pointer_type; using pointer = typename view_type::pointer_type;
using reference = typename view_type::reference_type; using reference = typename view_type::reference_type;
// oneDPL needs this alias in order not to assume the data is on the host but on
// the device, see
// https://github.com/uxlfoundation/oneDPL/blob/a045eac689f9107f50ba7b42235e9e927118e483/include/oneapi/dpl/pstl/hetero/dpcpp/utils_ranges_sycl.h#L210-L214
#ifdef KOKKOS_ENABLE_ONEDPL
using is_passed_directly = std::true_type;
#endif
static_assert(view_type::rank == 1 && static_assert(view_type::rank == 1 &&
(std::is_same_v<typename view_type::traits::array_layout, is_always_strided<::Kokkos::View<DataType, Args...>>::value);
Kokkos::LayoutLeft> ||
std::is_same_v<typename view_type::traits::array_layout,
Kokkos::LayoutRight> ||
std::is_same_v<typename view_type::traits::array_layout,
Kokkos::LayoutStride>),
"RandomAccessIterator only supports 1D Views with LayoutLeft, "
"LayoutRight, LayoutStride.");
KOKKOS_DEFAULTED_FUNCTION RandomAccessIterator() = default; KOKKOS_DEFAULTED_FUNCTION RandomAccessIterator() = default;
explicit KOKKOS_FUNCTION RandomAccessIterator(const view_type view) explicit KOKKOS_FUNCTION RandomAccessIterator(const view_type view)
: m_view(view) {} : m_data(view.data()), m_stride(view.stride_0()) {}
explicit KOKKOS_FUNCTION RandomAccessIterator(const view_type view, explicit KOKKOS_FUNCTION RandomAccessIterator(const view_type view,
ptrdiff_t current_index) ptrdiff_t current_index)
: m_view(view), m_current_index(current_index) {} : m_data(view.data() + current_index * view.stride_0()),
m_stride(view.stride_0()) {}
#ifndef KOKKOS_ENABLE_CXX17 // C++20 and beyond #ifndef KOKKOS_ENABLE_CXX17 // C++20 and beyond
template <class OtherViewType> template <class OtherViewType>
requires(std::is_constructible_v<view_type, OtherViewType>) requires(std::is_constructible_v<view_type, OtherViewType>)
KOKKOS_FUNCTION explicit(!std::is_convertible_v<OtherViewType, view_type>) KOKKOS_FUNCTION explicit(!std::is_convertible_v<OtherViewType, view_type>)
RandomAccessIterator(const RandomAccessIterator<OtherViewType>& other) RandomAccessIterator(const RandomAccessIterator<OtherViewType>& other)
: m_view(other.m_view), m_current_index(other.m_current_index) {} : m_data(other.m_data), m_stride(other.m_stride) {}
#else #else
template < template <
class OtherViewType, class OtherViewType,
@ -73,19 +96,22 @@ class RandomAccessIterator< ::Kokkos::View<DataType, Args...> > {
int> = 0> int> = 0>
KOKKOS_FUNCTION explicit RandomAccessIterator( KOKKOS_FUNCTION explicit RandomAccessIterator(
const RandomAccessIterator<OtherViewType>& other) const RandomAccessIterator<OtherViewType>& other)
: m_view(other.m_view), m_current_index(other.m_current_index) {} : m_data(other.m_data), m_stride(other.m_stride) {}
template <class OtherViewType, template <class OtherViewType,
std::enable_if_t<std::is_convertible_v<OtherViewType, view_type>, std::enable_if_t<std::is_convertible_v<OtherViewType, view_type>,
int> = 0> int> = 0>
KOKKOS_FUNCTION RandomAccessIterator( KOKKOS_FUNCTION RandomAccessIterator(
const RandomAccessIterator<OtherViewType>& other) const RandomAccessIterator<OtherViewType>& other)
: m_view(other.m_view), m_current_index(other.m_current_index) {} : m_data(other.m_data), m_stride(other.m_stride) {}
#endif #endif
KOKKOS_FUNCTION KOKKOS_FUNCTION
iterator_type& operator++() { iterator_type& operator++() {
++m_current_index; if constexpr (is_always_contiguous)
m_data++;
else
m_data += m_stride;
return *this; return *this;
} }
@ -98,7 +124,10 @@ class RandomAccessIterator< ::Kokkos::View<DataType, Args...> > {
KOKKOS_FUNCTION KOKKOS_FUNCTION
iterator_type& operator--() { iterator_type& operator--() {
--m_current_index; if constexpr (is_always_contiguous)
m_data--;
else
m_data -= m_stride;
return *this; return *this;
} }
@ -111,77 +140,95 @@ class RandomAccessIterator< ::Kokkos::View<DataType, Args...> > {
KOKKOS_FUNCTION KOKKOS_FUNCTION
reference operator[](difference_type n) const { reference operator[](difference_type n) const {
return m_view(m_current_index + n); if constexpr (is_always_contiguous)
return *(m_data + n);
else
return *(m_data + n * m_stride);
} }
KOKKOS_FUNCTION KOKKOS_FUNCTION
iterator_type& operator+=(difference_type n) { iterator_type& operator+=(difference_type n) {
m_current_index += n; if constexpr (is_always_contiguous)
m_data += n;
else
m_data += n * m_stride;
return *this; return *this;
} }
KOKKOS_FUNCTION KOKKOS_FUNCTION
iterator_type& operator-=(difference_type n) { iterator_type& operator-=(difference_type n) {
m_current_index -= n; if constexpr (is_always_contiguous)
m_data -= n;
else
m_data -= n * m_stride;
return *this; return *this;
} }
KOKKOS_FUNCTION KOKKOS_FUNCTION
iterator_type operator+(difference_type n) const { iterator_type operator+(difference_type n) const {
return iterator_type(m_view, m_current_index + n); auto it = *this;
it += n;
return it;
}
friend iterator_type operator+(difference_type n, iterator_type other) {
return other + n;
} }
KOKKOS_FUNCTION KOKKOS_FUNCTION
iterator_type operator-(difference_type n) const { iterator_type operator-(difference_type n) const {
return iterator_type(m_view, m_current_index - n); auto it = *this;
it -= n;
return it;
} }
KOKKOS_FUNCTION KOKKOS_FUNCTION
difference_type operator-(iterator_type it) const { difference_type operator-(iterator_type it) const {
return m_current_index - it.m_current_index; if constexpr (is_always_contiguous)
return m_data - it.m_data;
else
return (m_data - it.m_data) / m_stride;
} }
KOKKOS_FUNCTION KOKKOS_FUNCTION
bool operator==(iterator_type other) const { bool operator==(iterator_type other) const {
return m_current_index == other.m_current_index && return m_data == other.m_data && m_stride == other.m_stride;
m_view.data() == other.m_view.data();
} }
KOKKOS_FUNCTION KOKKOS_FUNCTION
bool operator!=(iterator_type other) const { bool operator!=(iterator_type other) const {
return m_current_index != other.m_current_index || return m_data != other.m_data || m_stride != other.m_stride;
m_view.data() != other.m_view.data();
} }
KOKKOS_FUNCTION KOKKOS_FUNCTION
bool operator<(iterator_type other) const { bool operator<(iterator_type other) const { return m_data < other.m_data; }
return m_current_index < other.m_current_index;
}
KOKKOS_FUNCTION KOKKOS_FUNCTION
bool operator<=(iterator_type other) const { bool operator<=(iterator_type other) const { return m_data <= other.m_data; }
return m_current_index <= other.m_current_index;
}
KOKKOS_FUNCTION KOKKOS_FUNCTION
bool operator>(iterator_type other) const { bool operator>(iterator_type other) const { return m_data > other.m_data; }
return m_current_index > other.m_current_index;
}
KOKKOS_FUNCTION KOKKOS_FUNCTION
bool operator>=(iterator_type other) const { bool operator>=(iterator_type other) const { return m_data >= other.m_data; }
return m_current_index >= other.m_current_index;
}
KOKKOS_FUNCTION KOKKOS_FUNCTION
reference operator*() const { return m_view(m_current_index); } reference operator*() const { return *m_data; }
KOKKOS_FUNCTION KOKKOS_FUNCTION
view_type view() const { return m_view; } pointer data() const { return m_data; }
KOKKOS_FUNCTION
int stride() const { return m_stride; }
private: private:
view_type m_view; pointer m_data;
ptrdiff_t m_current_index = 0; int m_stride;
static constexpr bool is_always_contiguous =
(std::is_same_v<typename view_type::traits::array_layout,
Kokkos::LayoutLeft> ||
std::is_same_v<typename view_type::traits::array_layout,
Kokkos::LayoutRight>);
// Needed for the converting constructor accepting another iterator // Needed for the converting constructor accepting another iterator
template <class> template <class>
@ -192,4 +239,10 @@ class RandomAccessIterator< ::Kokkos::View<DataType, Args...> > {
} // namespace Experimental } // namespace Experimental
} // namespace Kokkos } // namespace Kokkos
#ifdef KOKKOS_ENABLE_SYCL
template <class T>
struct sycl::is_device_copyable<
Kokkos::Experimental::Impl::RandomAccessIterator<T>> : std::true_type {};
#endif
#endif #endif

View File

@ -52,13 +52,10 @@ struct StdUniqueFunctor {
auto& val_i = m_first_from[i]; auto& val_i = m_first_from[i];
const auto& val_ip1 = m_first_from[i + 1]; const auto& val_ip1 = m_first_from[i + 1];
if (final_pass) {
if (!m_pred(val_i, val_ip1)) { if (!m_pred(val_i, val_ip1)) {
if (final_pass) {
m_first_dest[update] = std::move(val_i); m_first_dest[update] = std::move(val_i);
} }
}
if (!m_pred(val_i, val_ip1)) {
update += 1; update += 1;
} }
} }
@ -188,6 +185,7 @@ KOKKOS_FUNCTION IteratorType unique_team_impl(const TeamHandleType& teamHandle,
IteratorType result = first; IteratorType result = first;
IteratorType lfirst = first; IteratorType lfirst = first;
while (++lfirst != last) { while (++lfirst != last) {
// NOLINTNEXTLINE(bugprone-inc-dec-in-conditions)
if (!pred(*result, *lfirst) && ++result != lfirst) { if (!pred(*result, *lfirst) && ++result != lfirst) {
*result = std::move(*lfirst); *result = std::move(*lfirst);
} }

View File

@ -175,9 +175,8 @@ KOKKOS_FUNCTION OutputIterator unique_copy_team_impl(
d_first + count); d_first + count);
} }
#if defined KOKKOS_COMPILER_INTEL || \ #if defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \
(defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \ !defined(KOKKOS_COMPILER_MSVC)
!defined(KOKKOS_COMPILER_MSVC))
__builtin_unreachable(); __builtin_unreachable();
#endif #endif
} }

View File

@ -18,6 +18,8 @@ LINK ?= $(CXX)
LDFLAGS ?= LDFLAGS ?=
override LDFLAGS += -lpthread override LDFLAGS += -lpthread
KOKKOS_USE_DEPRECATED_MAKEFILES=1
include $(KOKKOS_PATH)/Makefile.kokkos include $(KOKKOS_PATH)/Makefile.kokkos
KOKKOS_CXXFLAGS += -I$(GTEST_PATH) -I${KOKKOS_PATH}/algorithms/unit_tests -I${KOKKOS_PATH}/core/unit_test/category_files KOKKOS_CXXFLAGS += -I$(GTEST_PATH) -I${KOKKOS_PATH}/algorithms/unit_tests -I${KOKKOS_PATH}/core/unit_test/category_files

View File

@ -281,7 +281,7 @@ struct test_random_scalar {
double covariance_eps = double covariance_eps =
result.covariance / num_draws / 2 / variance_expect; result.covariance / num_draws / 2 / variance_expect;
#if defined(KOKKOS_BHALF_T_IS_FLOAT) && !KOKKOS_BHALF_T_IS_FLOAT #if defined(KOKKOS_BHALF_T_IS_FLOAT) && !KOKKOS_BHALF_T_IS_FLOAT
if (!std::is_same<Scalar, Kokkos::Experimental::bhalf_t>::value) { if (!std::is_same_v<Scalar, Kokkos::Experimental::bhalf_t>) {
#endif #endif
EXPECT_LT(std::abs(mean_eps), tolerance); EXPECT_LT(std::abs(mean_eps), tolerance);
EXPECT_LT(std::abs(variance_eps), 1.5 * tolerance); EXPECT_LT(std::abs(variance_eps), 1.5 * tolerance);
@ -312,7 +312,7 @@ struct test_random_scalar {
(result.covariance / HIST_DIM1D - covariance_expect) / mean_expect; (result.covariance / HIST_DIM1D - covariance_expect) / mean_expect;
#if defined(KOKKOS_HALF_T_IS_FLOAT) && !KOKKOS_HALF_T_IS_FLOAT #if defined(KOKKOS_HALF_T_IS_FLOAT) && !KOKKOS_HALF_T_IS_FLOAT
if (std::is_same<Scalar, Kokkos::Experimental::half_t>::value) { if (std::is_same_v<Scalar, Kokkos::Experimental::half_t>) {
mean_eps_expect = 0.0003; mean_eps_expect = 0.0003;
variance_eps_expect = 1.0; variance_eps_expect = 1.0;
covariance_eps_expect = 5.0e4; covariance_eps_expect = 5.0e4;
@ -320,7 +320,7 @@ struct test_random_scalar {
#endif #endif
#if defined(KOKKOS_BHALF_T_IS_FLOAT) && !KOKKOS_BHALF_T_IS_FLOAT #if defined(KOKKOS_BHALF_T_IS_FLOAT) && !KOKKOS_BHALF_T_IS_FLOAT
if (!std::is_same<Scalar, Kokkos::Experimental::bhalf_t>::value) { if (!std::is_same_v<Scalar, Kokkos::Experimental::bhalf_t>) {
#endif #endif
EXPECT_LT(std::abs(mean_eps), mean_eps_expect); EXPECT_LT(std::abs(mean_eps), mean_eps_expect);
EXPECT_LT(std::abs(variance_eps), variance_eps_expect); EXPECT_LT(std::abs(variance_eps), variance_eps_expect);
@ -358,13 +358,13 @@ struct test_random_scalar {
(result.covariance / HIST_DIM1D - covariance_expect) / mean_expect; (result.covariance / HIST_DIM1D - covariance_expect) / mean_expect;
#if defined(KOKKOS_HALF_T_IS_FLOAT) && !KOKKOS_HALF_T_IS_FLOAT #if defined(KOKKOS_HALF_T_IS_FLOAT) && !KOKKOS_HALF_T_IS_FLOAT
if (std::is_same<Scalar, Kokkos::Experimental::half_t>::value) { if (std::is_same_v<Scalar, Kokkos::Experimental::half_t>) {
variance_factor = 7; variance_factor = 7;
} }
#endif #endif
#if defined(KOKKOS_BHALF_T_IS_FLOAT) && !KOKKOS_BHALF_T_IS_FLOAT #if defined(KOKKOS_BHALF_T_IS_FLOAT) && !KOKKOS_BHALF_T_IS_FLOAT
if (!std::is_same<Scalar, Kokkos::Experimental::bhalf_t>::value) { if (!std::is_same_v<Scalar, Kokkos::Experimental::bhalf_t>) {
#endif #endif
EXPECT_LT(std::abs(mean_eps), tolerance); EXPECT_LT(std::abs(mean_eps), tolerance);
EXPECT_LT(std::abs(variance_eps), variance_factor); EXPECT_LT(std::abs(variance_eps), variance_factor);

View File

@ -37,12 +37,18 @@ struct random_access_iterator_test : std_algorithms_test {
TEST_F(random_access_iterator_test, constructor) { TEST_F(random_access_iterator_test, constructor) {
// just tests that constructor works // just tests that constructor works
auto it1 = KE::Impl::RandomAccessIterator<static_view_t>(m_static_view); [[maybe_unused]] auto it1 =
auto it2 = KE::Impl::RandomAccessIterator<dyn_view_t>(m_dynamic_view); KE::Impl::RandomAccessIterator<static_view_t>(m_static_view);
auto it3 = KE::Impl::RandomAccessIterator<strided_view_t>(m_strided_view); [[maybe_unused]] auto it2 =
auto it4 = KE::Impl::RandomAccessIterator<static_view_t>(m_static_view, 3); KE::Impl::RandomAccessIterator<dyn_view_t>(m_dynamic_view);
auto it5 = KE::Impl::RandomAccessIterator<dyn_view_t>(m_dynamic_view, 3); [[maybe_unused]] auto it3 =
auto it6 = KE::Impl::RandomAccessIterator<strided_view_t>(m_strided_view, 3); KE::Impl::RandomAccessIterator<strided_view_t>(m_strided_view);
[[maybe_unused]] auto it4 =
KE::Impl::RandomAccessIterator<static_view_t>(m_static_view, 3);
[[maybe_unused]] auto it5 =
KE::Impl::RandomAccessIterator<dyn_view_t>(m_dynamic_view, 3);
[[maybe_unused]] auto it6 =
KE::Impl::RandomAccessIterator<strided_view_t>(m_strided_view, 3);
EXPECT_TRUE(true); EXPECT_TRUE(true);
} }

View File

@ -99,6 +99,7 @@ void test_dynamic_view_sort_impl(unsigned int n) {
Kokkos::Experimental::DynamicView<KeyType*, ExecutionSpace>; Kokkos::Experimental::DynamicView<KeyType*, ExecutionSpace>;
using KeyViewType = Kokkos::View<KeyType*, ExecutionSpace>; using KeyViewType = Kokkos::View<KeyType*, ExecutionSpace>;
// NOLINTNEXTLINE(bugprone-implicit-widening-of-multiplication-result)
const size_t upper_bound = 2 * n; const size_t upper_bound = 2 * n;
const size_t min_chunk_size = 1024; const size_t min_chunk_size = 1024;

View File

@ -198,9 +198,8 @@ auto create_deep_copyable_compatible_view_with_same_extent(ViewType view) {
// this is needed for intel to avoid // this is needed for intel to avoid
// error #1011: missing return statement at end of non-void function // error #1011: missing return statement at end of non-void function
#if defined KOKKOS_COMPILER_INTEL || \ #if defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \
(defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \ !defined(KOKKOS_COMPILER_MSVC)
!defined(KOKKOS_COMPILER_MSVC))
__builtin_unreachable(); __builtin_unreachable();
#endif #endif
} }

View File

@ -507,6 +507,20 @@ struct TestStruct {
} }
}; };
#ifndef KOKKOS_ENABLE_CXX17
template <typename ViewType>
constexpr bool
test_kokkos_iterator_satify_std_random_access_iterator_concept() {
return std::random_access_iterator<
Kokkos::Experimental::Impl::RandomAccessIterator<ViewType>>;
}
static_assert(test_kokkos_iterator_satify_std_random_access_iterator_concept<
Kokkos::View<int *>>());
static_assert(test_kokkos_iterator_satify_std_random_access_iterator_concept<
Kokkos::View<const int *>>());
#endif
} // namespace compileonly } // namespace compileonly
} // namespace stdalgos } // namespace stdalgos
} // namespace Test } // namespace Test

View File

@ -173,6 +173,7 @@ TEST(std_algorithms_DeathTest, expect_no_overlap) {
KE::Impl::expect_no_overlap(sub_first_d0, sub_last_d0, sub_first_d1); KE::Impl::expect_no_overlap(sub_first_d0, sub_last_d0, sub_first_d1);
// NOLINTNEXTLINE(bugprone-implicit-widening-of-multiplication-result)
Kokkos::LayoutStride layout2d{2, 3, extent0, 2 * 3}; Kokkos::LayoutStride layout2d{2, 3, extent0, 2 * 3};
Kokkos::View<value_type**, Kokkos::LayoutStride> strided_view_2d{ Kokkos::View<value_type**, Kokkos::LayoutStride> strided_view_2d{
"std-algo-test-2d-contiguous-view-strided", layout2d}; "std-algo-test-2d-contiguous-view-strided", layout2d};

View File

@ -171,7 +171,7 @@ struct VerifyData {
create_mirror_view_and_copy(Kokkos::HostSpace(), test_view_dc); create_mirror_view_and_copy(Kokkos::HostSpace(), test_view_dc);
if (test_view_h.extent(0) > 0) { if (test_view_h.extent(0) > 0) {
for (std::size_t i = 0; i < test_view_h.extent(0); ++i) { for (std::size_t i = 0; i < test_view_h.extent(0); ++i) {
if (std::is_same<gold_view_value_type, int>::value) { if (std::is_same_v<gold_view_value_type, int>) {
ASSERT_EQ(gold_h(i), test_view_h(i)); ASSERT_EQ(gold_h(i), test_view_h(i));
} else { } else {
const auto error = const auto error =

View File

@ -184,7 +184,7 @@ struct VerifyData {
const auto ext = test_view_h.extent(0); const auto ext = test_view_h.extent(0);
if (ext > 0) { if (ext > 0) {
for (std::size_t i = 0; i < ext; ++i) { for (std::size_t i = 0; i < ext; ++i) {
if (std::is_same<gold_view_value_type, int>::value) { if (std::is_same_v<gold_view_value_type, int>) {
ASSERT_EQ(gold_h(i), test_view_h(i)); ASSERT_EQ(gold_h(i), test_view_h(i));
} else { } else {
const auto error = const auto error =

View File

@ -153,12 +153,13 @@ void run_single_scenario(const InfoType& scenario_info) {
#if !defined KOKKOS_ENABLE_OPENMPTARGET #if !defined KOKKOS_ENABLE_OPENMPTARGET
CustomLessThanComparator<ValueType, ValueType> comp; CustomLessThanComparator<ValueType, ValueType> comp;
auto r5 = [[maybe_unused]] auto r5 =
KE::is_sorted_until(exespace(), KE::cbegin(view), KE::cend(view), comp); KE::is_sorted_until(exespace(), KE::cbegin(view), KE::cend(view), comp);
auto r6 = KE::is_sorted_until("label", exespace(), KE::cbegin(view), [[maybe_unused]] auto r6 = KE::is_sorted_until(
KE::cend(view), comp); "label", exespace(), KE::cbegin(view), KE::cend(view), comp);
auto r7 = KE::is_sorted_until(exespace(), view, comp); [[maybe_unused]] auto r7 = KE::is_sorted_until(exespace(), view, comp);
auto r8 = KE::is_sorted_until("label", exespace(), view, comp); [[maybe_unused]] auto r8 =
KE::is_sorted_until("label", exespace(), view, comp);
#endif #endif
ASSERT_EQ(r1, gold) << name << ", " << view_tag_to_string(Tag{}); ASSERT_EQ(r1, gold) << name << ", " << view_tag_to_string(Tag{});

View File

@ -53,13 +53,13 @@ TEST(std_algorithms_mod_ops_test, move) {
// move constr // move constr
MyMovableType b(std::move(a)); MyMovableType b(std::move(a));
ASSERT_EQ(b.m_value, 11); ASSERT_EQ(b.m_value, 11);
ASSERT_EQ(a.m_value, -2); ASSERT_EQ(a.m_value, -2); // NOLINT(bugprone-use-after-move)
// move assign // move assign
MyMovableType c; MyMovableType c;
c = std::move(b); c = std::move(b);
ASSERT_EQ(c.m_value, 11); ASSERT_EQ(c.m_value, 11);
ASSERT_EQ(b.m_value, -4); ASSERT_EQ(b.m_value, -4); // NOLINT(bugprone-use-after-move)
} }
template <class ViewType> template <class ViewType>
@ -70,7 +70,7 @@ struct StdAlgoModSeqOpsTestMove {
void operator()(const int index) const { void operator()(const int index) const {
typename ViewType::value_type a{11}; typename ViewType::value_type a{11};
using move_t = decltype(std::move(a)); using move_t = decltype(std::move(a));
static_assert(std::is_rvalue_reference<move_t>::value); static_assert(std::is_rvalue_reference_v<move_t>);
m_view(index) = std::move(a); m_view(index) = std::move(a);
} }

View File

@ -243,16 +243,15 @@ void run_and_check_transform_reduce_overloadA(ViewType1 first_view,
ViewType2 second_view, ViewType2 second_view,
ValueType init_value, ValueType init_value,
ValueType result_value, ValueType result_value,
Args&&... args) { Args const&... args) {
// trivial cases // trivial cases
const auto r1 = KE::transform_reduce( const auto r1 = KE::transform_reduce(
ExecutionSpace(), KE::cbegin(first_view), KE::cbegin(first_view), ExecutionSpace(), KE::cbegin(first_view), KE::cbegin(first_view),
KE::cbegin(second_view), init_value, std::forward<Args>(args)...); KE::cbegin(second_view), init_value, args...);
const auto r2 = const auto r2 = KE::transform_reduce(
KE::transform_reduce("MYLABEL", ExecutionSpace(), KE::cbegin(first_view), "MYLABEL", ExecutionSpace(), KE::cbegin(first_view),
KE::cbegin(first_view), KE::cbegin(second_view), KE::cbegin(first_view), KE::cbegin(second_view), init_value, args...);
init_value, std::forward<Args>(args)...);
ASSERT_EQ(r1, init_value); ASSERT_EQ(r1, init_value);
ASSERT_EQ(r2, init_value); ASSERT_EQ(r2, init_value);
@ -260,18 +259,16 @@ void run_and_check_transform_reduce_overloadA(ViewType1 first_view,
// non trivial cases // non trivial cases
const auto r3 = KE::transform_reduce( const auto r3 = KE::transform_reduce(
ExecutionSpace(), KE::cbegin(first_view), KE::cend(first_view), ExecutionSpace(), KE::cbegin(first_view), KE::cend(first_view),
KE::cbegin(second_view), init_value, std::forward<Args>(args)...); KE::cbegin(second_view), init_value, args...);
const auto r4 = KE::transform_reduce( const auto r4 = KE::transform_reduce(
"MYLABEL", ExecutionSpace(), KE::cbegin(first_view), KE::cend(first_view), "MYLABEL", ExecutionSpace(), KE::cbegin(first_view), KE::cend(first_view),
KE::cbegin(second_view), init_value, std::forward<Args>(args)...); KE::cbegin(second_view), init_value, args...);
const auto r5 = const auto r5 = KE::transform_reduce(ExecutionSpace(), first_view,
KE::transform_reduce(ExecutionSpace(), first_view, second_view, second_view, init_value, args...);
init_value, std::forward<Args>(args)...); const auto r6 = KE::transform_reduce("MYLABEL", ExecutionSpace(), first_view,
const auto r6 = second_view, init_value, args...);
KE::transform_reduce("MYLABEL", ExecutionSpace(), first_view, second_view,
init_value, std::forward<Args>(args)...);
ASSERT_EQ(r3, result_value); ASSERT_EQ(r3, result_value);
ASSERT_EQ(r4, result_value); ASSERT_EQ(r4, result_value);
@ -363,32 +360,30 @@ template <class ExecutionSpace, class ViewType, class ValueType, class... Args>
void run_and_check_transform_reduce_overloadB(ViewType view, void run_and_check_transform_reduce_overloadB(ViewType view,
ValueType init_value, ValueType init_value,
ValueType result_value, ValueType result_value,
Args&&... args) { Args const&... args) {
// trivial // trivial
const auto r1 = const auto r1 = KE::transform_reduce(ExecutionSpace(), KE::cbegin(view),
KE::transform_reduce(ExecutionSpace(), KE::cbegin(view), KE::cbegin(view), KE::cbegin(view), init_value, args...);
init_value, std::forward<Args>(args)...);
const auto r2 = KE::transform_reduce("MYLABEL", ExecutionSpace(), const auto r2 =
KE::cbegin(view), KE::cbegin(view), KE::transform_reduce("MYLABEL", ExecutionSpace(), KE::cbegin(view),
init_value, std::forward<Args>(args)...); KE::cbegin(view), init_value, args...);
ASSERT_EQ(r1, init_value); ASSERT_EQ(r1, init_value);
ASSERT_EQ(r2, init_value); ASSERT_EQ(r2, init_value);
// non trivial // non trivial
const auto r3 = const auto r3 = KE::transform_reduce(ExecutionSpace(), KE::cbegin(view),
KE::transform_reduce(ExecutionSpace(), KE::cbegin(view), KE::cend(view), KE::cend(view), init_value, args...);
init_value, std::forward<Args>(args)...);
const auto r4 = KE::transform_reduce("MYLABEL", ExecutionSpace(), const auto r4 =
KE::cbegin(view), KE::cend(view), KE::transform_reduce("MYLABEL", ExecutionSpace(), KE::cbegin(view),
init_value, std::forward<Args>(args)...); KE::cend(view), init_value, args...);
const auto r5 = KE::transform_reduce(ExecutionSpace(), view, init_value, const auto r5 =
std::forward<Args>(args)...); KE::transform_reduce(ExecutionSpace(), view, init_value, args...);
const auto r6 = KE::transform_reduce("MYLABEL", ExecutionSpace(), view, const auto r6 = KE::transform_reduce("MYLABEL", ExecutionSpace(), view,
init_value, std::forward<Args>(args)...); init_value, args...);
ASSERT_EQ(r3, result_value); ASSERT_EQ(r3, result_value);
ASSERT_EQ(r4, result_value); ASSERT_EQ(r4, result_value);

View File

@ -196,7 +196,7 @@ void run_single_scenario(const InfoType& scenario_info,
// create host copy BEFORE rotate or view will be modified // create host copy BEFORE rotate or view will be modified
auto view_h = create_host_space_copy(view); auto view_h = create_host_space_copy(view);
auto rit = KE::rotate(exespace(), view, rotation_point); auto rit = KE::rotate(exespace(), view, rotation_point);
// verify_data(rit, view, view_h, rotation_point); verify_data(rit, view, view_h, rotation_point);
} }
{ {

View File

@ -191,6 +191,7 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) {
ASSERT_EQ(stdDistance, distancesView_h(i)); ASSERT_EQ(stdDistance, distancesView_h(i));
break; break;
} }
default: Kokkos::abort("unreachable");
} }
} }

View File

@ -217,6 +217,7 @@ void test_A(const bool ensureAdjacentFindCanFind, std::size_t numTeams,
break; break;
} }
default: Kokkos::abort("unreachable");
} }
} }
} }

View File

@ -244,6 +244,7 @@ void test_A(const bool viewsAreEqual, std::size_t numTeams, std::size_t numCols,
break; break;
} }
default: Kokkos::abort("unreachable");
} }
} }
} }

View File

@ -224,6 +224,7 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) {
break; break;
} }
#endif #endif
default: Kokkos::abort("unreachable");
} }
#undef exclusive_scan #undef exclusive_scan

View File

@ -227,6 +227,7 @@ void test_A(const bool sequencesExist, std::size_t numTeams,
break; break;
} }
default: Kokkos::abort("unreachable");
} }
if (sequencesExist) { if (sequencesExist) {

View File

@ -244,6 +244,7 @@ void test_A(const bool sequencesExist, std::size_t numTeams,
break; break;
} }
default: Kokkos::abort("unreachable");
} }
} }
} }

View File

@ -57,14 +57,7 @@ struct TestFunctorA {
const auto myRowIndex = member.league_rank(); const auto myRowIndex = member.league_rank();
auto myRowViewFrom = Kokkos::subview(m_dataView, myRowIndex, Kokkos::ALL()); auto myRowViewFrom = Kokkos::subview(m_dataView, myRowIndex, Kokkos::ALL());
const auto val = m_greaterThanValuesView(myRowIndex); const auto val = m_greaterThanValuesView(myRowIndex);
// FIXME_INTEL
#if defined(KOKKOS_COMPILER_INTEL) && (1900 == KOKKOS_COMPILER_INTEL)
GreaterEqualFunctor<
typename GreaterThanValuesViewType::non_const_value_type>
unaryPred{val};
#else
GreaterEqualFunctor unaryPred{val}; GreaterEqualFunctor unaryPred{val};
#endif
ptrdiff_t resultDist = 0; ptrdiff_t resultDist = 0;
switch (m_apiPick) { switch (m_apiPick) {
@ -185,12 +178,7 @@ void test_A(const bool predicatesReturnTrue, std::size_t numTeams,
const auto rowFromBegin = KE::cbegin(rowFrom); const auto rowFromBegin = KE::cbegin(rowFrom);
const auto rowFromEnd = KE::cend(rowFrom); const auto rowFromEnd = KE::cend(rowFrom);
const auto val = greaterEqualValuesView_h(i); const auto val = greaterEqualValuesView_h(i);
// FIXME_INTEL
#if defined(KOKKOS_COMPILER_INTEL) && (1900 == KOKKOS_COMPILER_INTEL)
const GreaterEqualFunctor<ValueType> unaryPred{val};
#else
const GreaterEqualFunctor unaryPred{val}; const GreaterEqualFunctor unaryPred{val};
#endif
auto it = std::find_if(rowFromBegin, rowFromEnd, unaryPred); auto it = std::find_if(rowFromBegin, rowFromEnd, unaryPred);

View File

@ -57,14 +57,7 @@ struct TestFunctorA {
const auto myRowIndex = member.league_rank(); const auto myRowIndex = member.league_rank();
auto myRowViewFrom = Kokkos::subview(m_dataView, myRowIndex, Kokkos::ALL()); auto myRowViewFrom = Kokkos::subview(m_dataView, myRowIndex, Kokkos::ALL());
const auto val = m_greaterThanValuesView(myRowIndex); const auto val = m_greaterThanValuesView(myRowIndex);
// FIXME_INTEL
#if defined(KOKKOS_COMPILER_INTEL) && (1900 == KOKKOS_COMPILER_INTEL)
GreaterEqualFunctor<
typename GreaterThanValuesViewType::non_const_value_type>
unaryPred{val};
#else
GreaterEqualFunctor unaryPred{val}; GreaterEqualFunctor unaryPred{val};
#endif
ptrdiff_t resultDist = 0; ptrdiff_t resultDist = 0;
switch (m_apiPick) { switch (m_apiPick) {
@ -180,12 +173,7 @@ void test_A(const bool predicatesReturnTrue, std::size_t numTeams,
const auto rowFromBegin = KE::cbegin(rowFrom); const auto rowFromBegin = KE::cbegin(rowFrom);
const auto rowFromEnd = KE::cend(rowFrom); const auto rowFromEnd = KE::cend(rowFrom);
const auto val = greaterEqualValuesView_h(i); const auto val = greaterEqualValuesView_h(i);
// FIXME_INTEL
#if defined(KOKKOS_COMPILER_INTEL) && (1900 == KOKKOS_COMPILER_INTEL)
const GreaterEqualFunctor<ValueType> unaryPred{val};
#else
const GreaterEqualFunctor unaryPred{val}; const GreaterEqualFunctor unaryPred{val};
#endif
auto it = std::find_if_not(rowFromBegin, rowFromEnd, unaryPred); auto it = std::find_if_not(rowFromBegin, rowFromEnd, unaryPred);

View File

@ -253,6 +253,7 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) {
break; break;
} }
default: Kokkos::abort("unreachable");
} }
#undef inclusive_scan #undef inclusive_scan

View File

@ -245,6 +245,7 @@ void test_A(const TestCaseType testCase, std::size_t numTeams,
break; break;
} }
default: Kokkos::abort("unreachable");
} }
} }
} }

View File

@ -249,6 +249,7 @@ void test_A(const bool viewsAreEqual, std::size_t numTeams, std::size_t numCols,
break; break;
} }
default: Kokkos::abort("unreachable");
} }
} }
} }

View File

@ -242,6 +242,7 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) {
break; break;
} }
default: Kokkos::abort("unreachable");
} }
#undef reduce #undef reduce

View File

@ -243,6 +243,7 @@ void test_A(const bool sequencesExist, std::size_t numTeams,
break; break;
} }
default: Kokkos::abort("unreachable");
} }
} }
} }

View File

@ -258,6 +258,7 @@ void test_A(const bool sequencesExist, std::size_t numTeams,
break; break;
} }
default: Kokkos::abort("unreachable");
} }
} }
} }

View File

@ -203,6 +203,7 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) {
ASSERT_EQ(stdDistance, distancesView_h(i)); ASSERT_EQ(stdDistance, distancesView_h(i));
break; break;
} }
default: Kokkos::abort("unreachable");
} }
#undef transform_exclusive_scan #undef transform_exclusive_scan

View File

@ -240,6 +240,7 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) {
break; break;
} }
default: Kokkos::abort("unreachable");
} }
} }
#undef transform_inclusive_scan #undef transform_inclusive_scan

View File

@ -293,6 +293,7 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) {
break; break;
} }
default: Kokkos::abort("unreachable");
} }
#undef transform_reduce #undef transform_reduce

View File

@ -344,8 +344,7 @@ TEST(std_algorithms_numeric_ops_test, transform_exclusive_scan_functor) {
using view_type = Kokkos::View<int*, exespace>; using view_type = Kokkos::View<int*, exespace>;
view_type dummy_view("dummy_view", 0); view_type dummy_view("dummy_view", 0);
using unary_op_type = using unary_op_type =
Kokkos::Experimental::Impl::StdNumericScanIdentityReferenceUnaryFunctor< Kokkos::Experimental::Impl::StdNumericScanIdentityReferenceUnaryFunctor;
int>;
using functor_type = using functor_type =
Kokkos::Experimental::Impl::TransformExclusiveScanFunctorWithValueWrapper< Kokkos::Experimental::Impl::TransformExclusiveScanFunctorWithValueWrapper<
exespace, int, int, view_type, view_type, MultiplyFunctor<int>, exespace, int, int, view_type, view_type, MultiplyFunctor<int>,

View File

@ -390,8 +390,7 @@ TEST(std_algorithms_numeric_ops_test, transform_inclusive_scan_functor) {
int dummy = 0; int dummy = 0;
using view_type = Kokkos::View<int*, exespace>; using view_type = Kokkos::View<int*, exespace>;
view_type dummy_view("dummy_view", 0); view_type dummy_view("dummy_view", 0);
using unary_op_type = using unary_op_type = KE::Impl::StdNumericScanIdentityReferenceUnaryFunctor;
KE::Impl::StdNumericScanIdentityReferenceUnaryFunctor<int>;
{ {
using functor_type = using functor_type =
KE::Impl::ExeSpaceTransformInclusiveScanNoInitValueFunctor< KE::Impl::ExeSpaceTransformInclusiveScanNoInitValueFunctor<

View File

@ -2,6 +2,7 @@ KOKKOS_DEVICES=Cuda
KOKKOS_CUDA_OPTIONS=enable_lambda KOKKOS_CUDA_OPTIONS=enable_lambda
KOKKOS_ARCH = "SNB,Volta70" KOKKOS_ARCH = "SNB,Volta70"
KOKKOS_USE_DEPRECATED_MAKEFILES=1
MAKEFILE_PATH := $(subst Makefile,,$(abspath $(lastword $(MAKEFILE_LIST)))) MAKEFILE_PATH := $(subst Makefile,,$(abspath $(lastword $(MAKEFILE_LIST))))

View File

@ -2,6 +2,7 @@ KOKKOS_DEVICES=Cuda
KOKKOS_CUDA_OPTIONS=enable_lambda KOKKOS_CUDA_OPTIONS=enable_lambda
KOKKOS_ARCH = "SNB,Volta70" KOKKOS_ARCH = "SNB,Volta70"
KOKKOS_USE_DEPRECATED_MAKEFILES=1
MAKEFILE_PATH := $(subst Makefile,,$(abspath $(lastword $(MAKEFILE_LIST)))) MAKEFILE_PATH := $(subst Makefile,,$(abspath $(lastword $(MAKEFILE_LIST))))

View File

@ -2,6 +2,7 @@ KOKKOS_DEVICES=Cuda
KOKKOS_CUDA_OPTIONS=enable_lambda KOKKOS_CUDA_OPTIONS=enable_lambda
KOKKOS_ARCH = "SNB,Volta70" KOKKOS_ARCH = "SNB,Volta70"
KOKKOS_USE_DEPRECATED_MAKEFILES=1
MAKEFILE_PATH := $(subst Makefile,,$(abspath $(lastword $(MAKEFILE_LIST)))) MAKEFILE_PATH := $(subst Makefile,,$(abspath $(lastword $(MAKEFILE_LIST))))

View File

@ -37,7 +37,7 @@
template <int V> template <int V>
struct TestFunctor { struct TestFunctor {
double values[V]; double values[V] = {};
Kokkos::View<double*> a; Kokkos::View<double*> a;
int K; int K;
TestFunctor(Kokkos::View<double*> a_, int K_) : a(a_), K(K_) {} TestFunctor(Kokkos::View<double*> a_, int K_) : a(a_), K(K_) {}
@ -50,7 +50,7 @@ struct TestFunctor {
template <int V> template <int V>
struct TestRFunctor { struct TestRFunctor {
double values[V]; double values[V] = {};
Kokkos::View<double*> a; Kokkos::View<double*> a;
int K; int K;
TestRFunctor(Kokkos::View<double*> a_, int K_) : a(a_), K(K_) {} TestRFunctor(Kokkos::View<double*> a_, int K_) : a(a_), K(K_) {}
@ -247,12 +247,15 @@ int main(int argc, char* argv[]) {
// anything that doesn't start with -- // anything that doesn't start with --
if (arg.size() < 2 || if (arg.size() < 2 ||
(arg.size() >= 2 && arg[0] != '-' && arg[1] != '-')) { (arg.size() >= 2 && arg[0] != '-' && arg[1] != '-')) {
// signing off that arg.data() is null terminated
// NOLINTBEGIN(bugprone-suspicious-stringview-data-usage)
if (i == 1) if (i == 1)
N = atoi(arg.data()); N = atoi(arg.data());
else if (i == 2) else if (i == 2)
M = atoi(arg.data()); M = atoi(arg.data());
else if (i == 3) else if (i == 3)
K = atoi(arg.data()); K = atoi(arg.data());
// NOLINTEND(bugprone-suspicious-stringview-data-usage)
else { else {
Kokkos::abort("unexpected argument!"); Kokkos::abort("unexpected argument!");
} }

View File

@ -2,6 +2,7 @@ KOKKOS_DEVICES=Cuda
KOKKOS_CUDA_OPTIONS=enable_lambda KOKKOS_CUDA_OPTIONS=enable_lambda
KOKKOS_ARCH = "SNB,Volta70" KOKKOS_ARCH = "SNB,Volta70"
KOKKOS_USE_DEPRECATED_MAKEFILES=1
MAKEFILE_PATH := $(subst Makefile,,$(abspath $(lastword $(MAKEFILE_LIST)))) MAKEFILE_PATH := $(subst Makefile,,$(abspath $(lastword $(MAKEFILE_LIST))))

View File

@ -120,11 +120,12 @@ int main(int argc, char* argv[]) {
// view appropriately for test and should obey first-touch etc Second call to // view appropriately for test and should obey first-touch etc Second call to
// test is the one we actually care about and time // test is the one we actually care about and time
view_type_1d v_1(Kokkos::view_alloc(Kokkos::WithoutInitializing, "v_1"), view_type_1d v_1(Kokkos::view_alloc(Kokkos::WithoutInitializing, "v_1"),
team_range * team_size); static_cast<size_t>(team_range) * team_size);
view_type_2d v_2(Kokkos::view_alloc(Kokkos::WithoutInitializing, "v_2"), view_type_2d v_2(Kokkos::view_alloc(Kokkos::WithoutInitializing, "v_2"),
team_range * team_size, thread_range); static_cast<size_t>(team_range) * team_size, thread_range);
view_type_3d v_3(Kokkos::view_alloc(Kokkos::WithoutInitializing, "v_3"), view_type_3d v_3(Kokkos::view_alloc(Kokkos::WithoutInitializing, "v_3"),
team_range * team_size, thread_range, vector_range); static_cast<size_t>(team_range) * team_size, thread_range,
vector_range);
double result_computed = 0.0; double result_computed = 0.0;
double result_expect = 0.0; double result_expect = 0.0;

View File

@ -367,7 +367,7 @@ void test_policy(int team_range, int thread_range, int vector_range,
// parallel_for RangePolicy: range = team_size*team_range // parallel_for RangePolicy: range = team_size*team_range
if (test_type == 300) { if (test_type == 300) {
Kokkos::parallel_for( Kokkos::parallel_for(
"300 outer for", team_size * team_range, "300 outer for", static_cast<size_t>(team_size) * team_range,
KOKKOS_LAMBDA(const int idx) { KOKKOS_LAMBDA(const int idx) {
v1(idx) = idx; v1(idx) = idx;
// prevent compiler from optimizing away the loop // prevent compiler from optimizing away the loop
@ -376,14 +376,15 @@ void test_policy(int team_range, int thread_range, int vector_range,
// parallel_reduce RangePolicy: range = team_size*team_range // parallel_reduce RangePolicy: range = team_size*team_range
if (test_type == 400) { if (test_type == 400) {
Kokkos::parallel_reduce( Kokkos::parallel_reduce(
"400 outer reduce", team_size * team_range, "400 outer reduce", static_cast<size_t>(team_size) * team_range,
KOKKOS_LAMBDA(const int idx, double& val) { val += idx; }, result); KOKKOS_LAMBDA(const int idx, double& val) { val += idx; }, result);
result_expect = result_expect =
0.5 * (team_size * team_range) * (team_size * team_range - 1); 0.5 * (team_size * team_range) * (team_size * team_range - 1);
} }
// parallel_scan RangePolicy: range = team_size*team_range // parallel_scan RangePolicy: range = team_size*team_range
if (test_type == 500) { if (test_type == 500) {
Kokkos::parallel_scan("500 outer scan", team_size * team_range, Kokkos::parallel_scan("500 outer scan",
static_cast<size_t>(team_size) * team_range,
ParallelScanFunctor<ViewType1>(v1) ParallelScanFunctor<ViewType1>(v1)
#if 0 #if 0
// This does not compile with pre Cuda 8.0 - see Github Issue #913 for explanation // This does not compile with pre Cuda 8.0 - see Github Issue #913 for explanation

View File

@ -2,6 +2,7 @@ KOKKOS_DEVICES=Cuda
KOKKOS_CUDA_OPTIONS=enable_lambda KOKKOS_CUDA_OPTIONS=enable_lambda
KOKKOS_ARCH = "SNB,Volta70" KOKKOS_ARCH = "SNB,Volta70"
KOKKOS_USE_DEPRECATED_MAKEFILES=1
MAKEFILE_PATH := $(subst Makefile,,$(abspath $(lastword $(MAKEFILE_LIST)))) MAKEFILE_PATH := $(subst Makefile,,$(abspath $(lastword $(MAKEFILE_LIST))))

View File

@ -1,6 +1,7 @@
KOKKOS_DEVICES=Serial KOKKOS_DEVICES=Serial
KOKKOS_ARCH = "" KOKKOS_ARCH = ""
KOKKOS_USE_DEPRECATED_MAKEFILES=1
MAKEFILE_PATH := $(subst Makefile,,$(abspath $(lastword $(MAKEFILE_LIST)))) MAKEFILE_PATH := $(subst Makefile,,$(abspath $(lastword $(MAKEFILE_LIST))))

View File

@ -317,7 +317,7 @@ do
# End of Werror handling # End of Werror handling
#Handle unsupported standard flags #Handle unsupported standard flags
--std=c++1y|-std=c++1y|--std=gnu++1y|-std=gnu++1y|--std=c++1z|-std=c++1z|--std=gnu++1z|-std=gnu++1z|--std=c++2a|-std=c++2a) --std=c++1y|-std=c++1y|--std=gnu++1y|-std=gnu++1y|--std=c++1z|-std=c++1z|--std=gnu++1z|-std=gnu++1z|--std=c++2a|-std=c++2a)
fallback_std_flag="-std=c++14" fallback_std_flag="-std=c++17"
# this is hopefully just occurring in a downstream project during CMake feature tests # this is hopefully just occurring in a downstream project during CMake feature tests
# we really have no choice here but to accept the flag and change to an accepted C++ standard # we really have no choice here but to accept the flag and change to an accepted C++ standard
echo "nvcc_wrapper does not accept standard flags $1 since partial standard flags and standards after C++17 are not supported. nvcc_wrapper will use $fallback_std_flag instead. It is undefined behavior to use this flag. This should only be occurring during CMake configuration." echo "nvcc_wrapper does not accept standard flags $1 since partial standard flags and standards after C++17 are not supported. nvcc_wrapper will use $fallback_std_flag instead. It is undefined behavior to use this flag. This should only be occurring during CMake configuration."
@ -346,35 +346,17 @@ do
# NVCC only has C++20 from version 12 on # NVCC only has C++20 from version 12 on
cuda_main_version=$([[ $(${nvcc_compiler} --version) =~ V([0-9]+) ]] && echo ${BASH_REMATCH[1]}) cuda_main_version=$([[ $(${nvcc_compiler} --version) =~ V([0-9]+) ]] && echo ${BASH_REMATCH[1]})
if [ ${cuda_main_version} -lt 12 ]; then if [ ${cuda_main_version} -lt 12 ]; then
fallback_std_flag="-std=c++14" fallback_std_flag="-std=c++17"
# this is hopefully just occurring in a downstream project during CMake feature tests # this is hopefully just occurring in a downstream project during CMake feature tests
# we really have no choice here but to accept the flag and change to an accepted C++ standard # we really have no choice here but to accept the flag and change to an accepted C++ standard
echo "nvcc_wrapper does not accept standard flags $1 since partial standard flags and standards after C++14 are not supported. nvcc_wrapper will use $fallback_std_flag instead. It is undefined behavior to use this flag. This should only be occurring during CMake configuration." echo "nvcc_wrapper does not accept standard flags $1 since partial standard flags and standards after C++17 are not supported. nvcc_wrapper will use $fallback_std_flag instead. It is undefined behavior to use this flag. This should only be occurring during CMake configuration."
std_flag=$fallback_std_flag std_flag=$fallback_std_flag
else else
std_flag=$1 std_flag=$1
fi fi
shared_args="$shared_args $std_flag" shared_args="$shared_args $std_flag"
;; ;;
--std=c++17|-std=c++17) --std=c++11|-std=c++11|--std=c++14|-std=c++14|--std=c++17|-std=c++17)
if [ -n "$std_flag" ]; then
warn_std_flag
shared_args=${shared_args/ $std_flag/}
fi
# NVCC only has C++17 from version 11 on
cuda_main_version=$([[ $(${nvcc_compiler} --version) =~ V([0-9]+) ]] && echo ${BASH_REMATCH[1]})
if [ ${cuda_main_version} -lt 11 ]; then
fallback_std_flag="-std=c++14"
# this is hopefully just occurring in a downstream project during CMake feature tests
# we really have no choice here but to accept the flag and change to an accepted C++ standard
echo "nvcc_wrapper does not accept standard flags $1 since partial standard flags and standards after C++14 are not supported. nvcc_wrapper will use $fallback_std_flag instead. It is undefined behavior to use this flag. This should only be occurring during CMake configuration."
std_flag=$fallback_std_flag
else
std_flag=$1
fi
shared_args="$shared_args $std_flag"
;;
--std=c++11|-std=c++11|--std=c++14|-std=c++14)
if [ -n "$std_flag" ]; then if [ -n "$std_flag" ]; then
warn_std_flag warn_std_flag
shared_args=${shared_args/ $std_flag/} shared_args=${shared_args/ $std_flag/}
@ -500,6 +482,20 @@ do
xlinker_args="$xlinker_args -Xlinker ${1:4:${#1}}" xlinker_args="$xlinker_args -Xlinker ${1:4:${#1}}"
host_linker_args="$host_linker_args ${1:4:${#1}}" host_linker_args="$host_linker_args ${1:4:${#1}}"
;; ;;
#Handle host assembler options
-Wa,*)
#To pass the -Wa options to the host compiler via -Xcompiler it is necessary
#to use '\\,' for each comma in the options. As users might already add escapes
#to the comma by themselves, the escapes are first removed and then only the
#required number of \ are added back.
xcompiler_args_wa=$(echo -e "$1" | sed -E 's/\\\+,/,/g' | sed -E 's/,/\\\\\\\,/g')
if [ $first_xcompiler_arg -eq 1 ]; then
xcompiler_args="$xcompiler_args_wa"
first_xcompiler_arg=0
else
xcompiler_args="$xcompiler_args,$xcompiler_args_wa"
fi
;;
#Handle object files: -x cu applies to all input files, so give them to linker, except if only linking #Handle object files: -x cu applies to all input files, so give them to linker, except if only linking
*.a|*.so|*.o|*.obj) *.a|*.so|*.o|*.obj)
object_files="$object_files $1" object_files="$object_files $1"

View File

@ -2,46 +2,49 @@
# loaded by include() and find_package() commands except when invoked with # loaded by include() and find_package() commands except when invoked with
# the NO_POLICY_SCOPE option # the NO_POLICY_SCOPE option
# CMP0057 + NEW -> IN_LIST operator in IF(...) # CMP0057 + NEW -> IN_LIST operator in IF(...)
CMAKE_POLICY(SET CMP0057 NEW) cmake_policy(SET CMP0057 NEW)
# Compute paths # Compute paths
@PACKAGE_INIT@ @PACKAGE_INIT@
#Find dependencies #Find dependencies
INCLUDE(CMakeFindDependencyMacro) include(CMakeFindDependencyMacro)
#This needs to go above the KokkosTargets in case #This needs to go above the KokkosTargets in case
#the Kokkos targets depend in some way on the TPL imports #the Kokkos targets depend in some way on the TPL imports
@KOKKOS_TPL_EXPORTS@ @KOKKOS_TPL_EXPORTS@
GET_FILENAME_COMPONENT(Kokkos_CMAKE_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH) get_filename_component(Kokkos_CMAKE_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH)
INCLUDE("${Kokkos_CMAKE_DIR}/KokkosTargets.cmake") include("${Kokkos_CMAKE_DIR}/KokkosTargets.cmake")
INCLUDE("${Kokkos_CMAKE_DIR}/KokkosConfigCommon.cmake") include("${Kokkos_CMAKE_DIR}/KokkosConfigCommon.cmake")
UNSET(Kokkos_CMAKE_DIR) unset(Kokkos_CMAKE_DIR)
# check for conflicts # check for conflicts
IF("launch_compiler" IN_LIST Kokkos_FIND_COMPONENTS AND if("launch_compiler" IN_LIST Kokkos_FIND_COMPONENTS AND "separable_compilation" IN_LIST Kokkos_FIND_COMPONENTS)
"separable_compilation" IN_LIST Kokkos_FIND_COMPONENTS) message(STATUS "'launch_compiler' implies global redirection of targets depending on Kokkos to appropriate compiler.")
MESSAGE(STATUS "'launch_compiler' implies global redirection of targets depending on Kokkos to appropriate compiler.") message(
MESSAGE(STATUS "'separable_compilation' implies explicitly defining where redirection occurs via 'kokkos_compilation(PROJECT|TARGET|SOURCE|DIRECTORY ...)'") STATUS
MESSAGE(FATAL_ERROR "Conflicting COMPONENTS: 'launch_compiler' and 'separable_compilation'") "'separable_compilation' implies explicitly defining where redirection occurs via 'kokkos_compilation(PROJECT|TARGET|SOURCE|DIRECTORY ...)'"
ENDIF() )
message(FATAL_ERROR "Conflicting COMPONENTS: 'launch_compiler' and 'separable_compilation'")
endif()
IF("launch_compiler" IN_LIST Kokkos_FIND_COMPONENTS) if("launch_compiler" IN_LIST Kokkos_FIND_COMPONENTS)
# #
# if find_package(Kokkos COMPONENTS launch_compiler) then rely on the # if find_package(Kokkos COMPONENTS launch_compiler) then rely on the
# RULE_LAUNCH_COMPILE and RULE_LAUNCH_LINK to always redirect to the # RULE_LAUNCH_COMPILE and RULE_LAUNCH_LINK to always redirect to the
# appropriate compiler for Kokkos # appropriate compiler for Kokkos
# #
MESSAGE(STATUS "kokkos_launch_compiler is enabled globally. C++ compiler commands with -DKOKKOS_DEPENDENCE will be redirected to the appropriate compiler for Kokkos") message(
kokkos_compilation( STATUS
GLOBAL "kokkos_launch_compiler is enabled globally. C++ compiler commands with -DKOKKOS_DEPENDENCE will be redirected to the appropriate compiler for Kokkos"
CHECK_CUDA_COMPILES) )
kokkos_compilation(GLOBAL CHECK_CUDA_COMPILES)
ELSEIF(@Kokkos_ENABLE_CUDA@ elseif(@Kokkos_ENABLE_CUDA@ AND NOT @KOKKOS_COMPILE_LANGUAGE@ STREQUAL CUDA AND NOT "separable_compilation" IN_LIST
AND NOT @KOKKOS_COMPILE_LANGUAGE@ STREQUAL CUDA Kokkos_FIND_COMPONENTS
AND NOT "separable_compilation" IN_LIST Kokkos_FIND_COMPONENTS) )
# #
# if CUDA was enabled, the compilation language was not set to CUDA, and separable compilation was not # if CUDA was enabled, the compilation language was not set to CUDA, and separable compilation was not
# specified, then set the RULE_LAUNCH_COMPILE and RULE_LAUNCH_LINK globally and # specified, then set the RULE_LAUNCH_COMPILE and RULE_LAUNCH_LINK globally and
@ -54,13 +57,16 @@ ELSEIF(@Kokkos_ENABLE_CUDA@
kokkos_compiler_is_nvcc(IS_NVCC ${CMAKE_CXX_COMPILER}) kokkos_compiler_is_nvcc(IS_NVCC ${CMAKE_CXX_COMPILER})
# if not nvcc_wrapper and Kokkos_LAUNCH_COMPILER was not set to OFF # if not nvcc_wrapper and Kokkos_LAUNCH_COMPILER was not set to OFF
IF(NOT IS_NVCC AND (NOT DEFINED Kokkos_LAUNCH_COMPILER OR Kokkos_LAUNCH_COMPILER)) if(NOT IS_NVCC AND (NOT DEFINED Kokkos_LAUNCH_COMPILER OR Kokkos_LAUNCH_COMPILER))
MESSAGE(STATUS "kokkos_launch_compiler is enabled globally. C++ compiler commands with -DKOKKOS_DEPENDENCE will be redirected to the appropriate compiler for Kokkos") message(
STATUS
"kokkos_launch_compiler is enabled globally. C++ compiler commands with -DKOKKOS_DEPENDENCE will be redirected to the appropriate compiler for Kokkos"
)
kokkos_compilation(GLOBAL) kokkos_compilation(GLOBAL)
ENDIF() endif()
# be mindful of the environment, pollution is bad # be mindful of the environment, pollution is bad
UNSET(IS_NVCC) unset(IS_NVCC)
ENDIF() endif()
set(Kokkos_COMPILE_LANGUAGE @KOKKOS_COMPILE_LANGUAGE@) set(Kokkos_COMPILE_LANGUAGE @KOKKOS_COMPILE_LANGUAGE@)

View File

@ -1,67 +1,67 @@
SET(Kokkos_DEVICES @KOKKOS_ENABLED_DEVICES@) set(Kokkos_DEVICES @KOKKOS_ENABLED_DEVICES@)
SET(Kokkos_OPTIONS @KOKKOS_ENABLED_OPTIONS@) set(Kokkos_OPTIONS @KOKKOS_ENABLED_OPTIONS@)
SET(Kokkos_TPLS @KOKKOS_ENABLED_TPLS@) set(Kokkos_TPLS @KOKKOS_ENABLED_TPLS@)
SET(Kokkos_ARCH @KOKKOS_ENABLED_ARCH_LIST@) set(Kokkos_ARCH @KOKKOS_ENABLED_ARCH_LIST@)
SET(Kokkos_CXX_COMPILER "@CMAKE_CXX_COMPILER@") set(Kokkos_CXX_COMPILER "@CMAKE_CXX_COMPILER@")
SET(Kokkos_CXX_COMPILER_ID "@KOKKOS_CXX_COMPILER_ID@") set(Kokkos_CXX_COMPILER_ID "@KOKKOS_CXX_COMPILER_ID@")
SET(Kokkos_CXX_COMPILER_VERSION "@KOKKOS_CXX_COMPILER_VERSION@") set(Kokkos_CXX_COMPILER_VERSION "@KOKKOS_CXX_COMPILER_VERSION@")
SET(Kokkos_CXX_STANDARD @KOKKOS_CXX_STANDARD@) set(Kokkos_CXX_STANDARD @KOKKOS_CXX_STANDARD@)
# Required to be a TriBITS-compliant external package # Required to be a TriBITS-compliant external package
IF(NOT TARGET Kokkos::all_libs) if(NOT TARGET Kokkos::all_libs)
# CMake Error at <prefix>/lib/cmake/Kokkos/KokkosConfigCommon.cmake:10 (ADD_LIBRARY): # CMake Error at <prefix>/lib/cmake/Kokkos/KokkosConfigCommon.cmake:10 (ADD_LIBRARY):
# ADD_LIBRARY cannot create ALIAS target "Kokkos::all_libs" because target # ADD_LIBRARY cannot create ALIAS target "Kokkos::all_libs" because target
# "Kokkos::kokkos" is imported but not globally visible. # "Kokkos::kokkos" is imported but not globally visible.
IF(CMAKE_VERSION VERSION_LESS "3.18") if(CMAKE_VERSION VERSION_LESS "3.18")
SET_TARGET_PROPERTIES(Kokkos::kokkos PROPERTIES IMPORTED_GLOBAL ON) set_target_properties(Kokkos::kokkos PROPERTIES IMPORTED_GLOBAL ON)
ENDIF() endif()
ADD_LIBRARY(Kokkos::all_libs ALIAS Kokkos::kokkos) add_library(Kokkos::all_libs ALIAS Kokkos::kokkos)
ENDIF() endif()
# Export Kokkos_ENABLE_<BACKEND> for each backend that was enabled. # Export Kokkos_ENABLE_<BACKEND> for each backend that was enabled.
# NOTE: "Devices" is a little bit of a misnomer here. These are really # NOTE: "Devices" is a little bit of a misnomer here. These are really
# backends, e.g. Kokkos_ENABLE_OPENMP, Kokkos_ENABLE_CUDA, Kokkos_ENABLE_HIP, # backends, e.g. Kokkos_ENABLE_OPENMP, Kokkos_ENABLE_CUDA, Kokkos_ENABLE_HIP,
# or Kokkos_ENABLE_SYCL. # or Kokkos_ENABLE_SYCL.
FOREACH(DEV ${Kokkos_DEVICES}) foreach(DEV ${Kokkos_DEVICES})
SET(Kokkos_ENABLE_${DEV} ON) set(Kokkos_ENABLE_${DEV} ON)
ENDFOREACH() endforeach()
# Export relevant Kokkos_ENABLE<OPTION> variables, e.g. # Export relevant Kokkos_ENABLE<OPTION> variables, e.g.
# Kokkos_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE, Kokkos_ENABLE_DEBUG, etc. # Kokkos_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE, Kokkos_ENABLE_DEBUG, etc.
FOREACH(OPT ${Kokkos_OPTIONS}) foreach(OPT ${Kokkos_OPTIONS})
SET(Kokkos_ENABLE_${OPT} ON) set(Kokkos_ENABLE_${OPT} ON)
ENDFOREACH() endforeach()
IF(Kokkos_ENABLE_CUDA) if(Kokkos_ENABLE_CUDA)
SET(Kokkos_CUDA_ARCHITECTURES @KOKKOS_CUDA_ARCHITECTURES@) set(Kokkos_CUDA_ARCHITECTURES @KOKKOS_CUDA_ARCHITECTURES@)
ENDIF() endif()
IF(Kokkos_ENABLE_HIP) if(Kokkos_ENABLE_HIP)
SET(Kokkos_HIP_ARCHITECTURES @KOKKOS_HIP_ARCHITECTURES@) set(Kokkos_HIP_ARCHITECTURES @KOKKOS_HIP_ARCHITECTURES@)
ENDIF() endif()
IF(NOT Kokkos_FIND_QUIETLY) if(NOT Kokkos_FIND_QUIETLY)
MESSAGE(STATUS "Enabled Kokkos devices: ${Kokkos_DEVICES}") message(STATUS "Enabled Kokkos devices: ${Kokkos_DEVICES}")
ENDIF() endif()
IF (Kokkos_ENABLE_CUDA) if(Kokkos_ENABLE_CUDA)
# If we are building CUDA, we have tricked CMake because we declare a CXX project # If we are building CUDA, we have tricked CMake because we declare a CXX project
# If the default C++ standard for a given compiler matches the requested # If the default C++ standard for a given compiler matches the requested
# standard, then CMake just omits the -std flag in later versions of CMake # standard, then CMake just omits the -std flag in later versions of CMake
# This breaks CUDA compilation (CUDA compiler can have a different default # This breaks CUDA compilation (CUDA compiler can have a different default
# -std then the underlying host compiler by itself). Setting this variable # -std then the underlying host compiler by itself). Setting this variable
# forces CMake to always add the -std flag even if it thinks it doesn't need it # forces CMake to always add the -std flag even if it thinks it doesn't need it
SET(CMAKE_CXX_STANDARD_DEFAULT 98 CACHE INTERNAL "" FORCE) set(CMAKE_CXX_STANDARD_DEFAULT 98 CACHE INTERNAL "" FORCE)
ENDIF() endif()
SET(KOKKOS_USE_CXX_EXTENSIONS @KOKKOS_USE_CXX_EXTENSIONS@) set(KOKKOS_USE_CXX_EXTENSIONS @KOKKOS_USE_CXX_EXTENSIONS@)
IF (NOT DEFINED CMAKE_CXX_EXTENSIONS OR CMAKE_CXX_EXTENSIONS) if(NOT DEFINED CMAKE_CXX_EXTENSIONS OR CMAKE_CXX_EXTENSIONS)
IF (NOT KOKKOS_USE_CXX_EXTENSIONS) if(NOT KOKKOS_USE_CXX_EXTENSIONS)
MESSAGE(WARNING "The installed Kokkos configuration does not support CXX extensions. Forcing -DCMAKE_CXX_EXTENSIONS=Off") message(
SET(CMAKE_CXX_EXTENSIONS OFF CACHE BOOL "" FORCE) WARNING "The installed Kokkos configuration does not support CXX extensions. Forcing -DCMAKE_CXX_EXTENSIONS=Off"
ENDIF() )
ENDIF() set(CMAKE_CXX_EXTENSIONS OFF CACHE BOOL "" FORCE)
endif()
include(FindPackageHandleStandardArgs) endif()
# This function makes sure that Kokkos was built with the requested backends # This function makes sure that Kokkos was built with the requested backends
# and target architectures and generates a fatal error if it was not. # and target architectures and generates a fatal error if it was not.
@ -89,30 +89,25 @@ function(kokkos_check)
endforeach() endforeach()
set(KOKKOS_CHECK_SUCCESS TRUE) set(KOKKOS_CHECK_SUCCESS TRUE)
foreach(arg ${REQUESTED_ARGS}) foreach(arg ${REQUESTED_ARGS})
# Define variables named after the required arguments that are provided by set(MISSING_OPTIONS "")
# the Kokkos install.
foreach(requested ${KOKKOS_CHECK_${arg}}) foreach(requested ${KOKKOS_CHECK_${arg}})
set(FOUND_MATCHING_OPTION FALSE)
foreach(provided ${Kokkos_${arg}}) foreach(provided ${Kokkos_${arg}})
STRING(TOUPPER ${requested} REQUESTED_UC) string(TOUPPER ${requested} REQUESTED_UC)
STRING(TOUPPER ${provided} PROVIDED_UC) string(TOUPPER ${provided} PROVIDED_UC)
if(PROVIDED_UC STREQUAL REQUESTED_UC) if(PROVIDED_UC STREQUAL REQUESTED_UC)
string(REPLACE ";" " " ${requested} "${KOKKOS_CHECK_${arg}}") set(FOUND_MATCHING_OPTION TRUE)
endif() endif()
endforeach() endforeach()
endforeach() if(NOT FOUND_MATCHING_OPTION)
# Somewhat divert the CMake function below from its original purpose and list(APPEND MISSING_OPTIONS ${requested})
# use it to check that there are variables defined for all required
# arguments. Success or failure messages will be displayed but we are
# responsible for signaling failure and skip the build system generation.
if (KOKKOS_CHECK_RETURN_VALUE)
set(Kokkos_${arg}_FIND_QUIETLY ON)
endif()
find_package_handle_standard_args("Kokkos_${arg}" DEFAULT_MSG
${KOKKOS_CHECK_${arg}})
if(NOT Kokkos_${arg}_FOUND)
set(KOKKOS_CHECK_SUCCESS FALSE) set(KOKKOS_CHECK_SUCCESS FALSE)
endif() endif()
endforeach() endforeach()
if(NOT KOKKOS_CHECK_SUCCESS AND NOT KOKKOS_CHECK_RETURN_VALUE)
message(STATUS "Could NOT find Kokkos_${arg} (missing: ${MISSING_OPTIONS})")
endif()
endforeach()
if(NOT KOKKOS_CHECK_SUCCESS AND NOT KOKKOS_CHECK_RETURN_VALUE) if(NOT KOKKOS_CHECK_SUCCESS AND NOT KOKKOS_CHECK_RETURN_VALUE)
message(FATAL_ERROR "Kokkos does NOT provide all backends and/or architectures requested") message(FATAL_ERROR "Kokkos does NOT provide all backends and/or architectures requested")
else() else()
@ -122,31 +117,34 @@ endfunction()
# A test to check whether a downstream project set the C++ compiler to NVCC or not # A test to check whether a downstream project set the C++ compiler to NVCC or not
# this is called only when Kokkos was installed with Kokkos_ENABLE_CUDA=ON # this is called only when Kokkos was installed with Kokkos_ENABLE_CUDA=ON
FUNCTION(kokkos_compiler_is_nvcc VAR COMPILER) function(kokkos_compiler_is_nvcc VAR COMPILER)
# Check if the compiler is nvcc (which really means nvcc_wrapper). # Check if the compiler is nvcc (which really means nvcc_wrapper).
EXECUTE_PROCESS(COMMAND ${COMPILER} ${ARGN} --version execute_process(
COMMAND ${COMPILER} ${ARGN} --version
OUTPUT_VARIABLE INTERNAL_COMPILER_VERSION OUTPUT_VARIABLE INTERNAL_COMPILER_VERSION
OUTPUT_STRIP_TRAILING_WHITESPACE OUTPUT_STRIP_TRAILING_WHITESPACE
RESULT_VARIABLE RET) RESULT_VARIABLE RET
)
# something went wrong # something went wrong
IF(RET GREATER 0) if(RET GREATER 0)
SET(${VAR} false PARENT_SCOPE) set(${VAR} false PARENT_SCOPE)
ELSE() else()
STRING(REPLACE "\n" " - " INTERNAL_COMPILER_VERSION_ONE_LINE ${INTERNAL_COMPILER_VERSION} ) string(REPLACE "\n" " - " INTERNAL_COMPILER_VERSION_ONE_LINE ${INTERNAL_COMPILER_VERSION})
STRING(FIND ${INTERNAL_COMPILER_VERSION_ONE_LINE} "nvcc" INTERNAL_COMPILER_VERSION_CONTAINS_NVCC) string(FIND ${INTERNAL_COMPILER_VERSION_ONE_LINE} "nvcc" INTERNAL_COMPILER_VERSION_CONTAINS_NVCC)
STRING(REGEX REPLACE "^ +" "" INTERNAL_HAVE_COMPILER_NVCC "${INTERNAL_HAVE_COMPILER_NVCC}") string(REGEX REPLACE "^ +" "" INTERNAL_HAVE_COMPILER_NVCC "${INTERNAL_HAVE_COMPILER_NVCC}")
IF(${INTERNAL_COMPILER_VERSION_CONTAINS_NVCC} GREATER -1) if(${INTERNAL_COMPILER_VERSION_CONTAINS_NVCC} GREATER -1)
SET(${VAR} true PARENT_SCOPE) set(${VAR} true PARENT_SCOPE)
ELSE() else()
SET(${VAR} false PARENT_SCOPE) set(${VAR} false PARENT_SCOPE)
ENDIF() endif()
ENDIF() endif()
ENDFUNCTION() endfunction()
# this function checks whether the current CXX compiler supports building CUDA # this function checks whether the current CXX compiler supports building CUDA
FUNCTION(kokkos_cxx_compiler_cuda_test _VAR _COMPILER) function(kokkos_cxx_compiler_cuda_test _VAR _COMPILER)
FILE(WRITE ${PROJECT_BINARY_DIR}/compile_tests/compiles_cuda.cu file(
WRITE ${PROJECT_BINARY_DIR}/compile_tests/compiles_cuda.cu
" "
#include <cuda.h> #include <cuda.h>
#include <cstdlib> #include <cstdlib>
@ -171,34 +169,39 @@ int main()
cudaDeviceSynchronize(); cudaDeviceSynchronize();
return EXIT_SUCCESS; return EXIT_SUCCESS;
} }
") "
)
# save the command for debugging # save the command for debugging
SET(_COMMANDS "${_COMPILER} ${ARGN} -c ${PROJECT_BINARY_DIR}/compile_tests/compiles_cuda.cu") set(_COMMANDS "${_COMPILER} ${ARGN} -c ${PROJECT_BINARY_DIR}/compile_tests/compiles_cuda.cu")
# use execute_process instead of try compile because we want to set custom compiler # use execute_process instead of try compile because we want to set custom compiler
EXECUTE_PROCESS(COMMAND ${_COMPILER} ${ARGN} -c ${PROJECT_BINARY_DIR}/compile_tests/compiles_cuda.cu execute_process(
COMMAND ${_COMPILER} ${ARGN} -c ${PROJECT_BINARY_DIR}/compile_tests/compiles_cuda.cu
RESULT_VARIABLE _RET RESULT_VARIABLE _RET
WORKING_DIRECTORY ${PROJECT_BINARY_DIR}/compile_tests WORKING_DIRECTORY ${PROJECT_BINARY_DIR}/compile_tests
TIMEOUT 15 TIMEOUT 15
OUTPUT_QUIET OUTPUT_QUIET ERROR_QUIET
ERROR_QUIET) )
IF(NOT _RET EQUAL 0) if(NOT _RET EQUAL 0)
# save the command for debugging # save the command for debugging
SET(_COMMANDS "${_COMMAND}\n${_COMPILER} --cuda-gpu-arch=sm_35 ${ARGN} -c ${PROJECT_BINARY_DIR}/compile_tests/compiles_cuda.cu") set(_COMMANDS
"${_COMMAND}\n${_COMPILER} --cuda-gpu-arch=sm_35 ${ARGN} -c ${PROJECT_BINARY_DIR}/compile_tests/compiles_cuda.cu"
)
# try the compile test again with clang arguments # try the compile test again with clang arguments
EXECUTE_PROCESS(COMMAND ${_COMPILER} --cuda-gpu-arch=sm_35 -c ${PROJECT_BINARY_DIR}/compile_tests/compiles_cuda.cu execute_process(
COMMAND ${_COMPILER} --cuda-gpu-arch=sm_35 -c ${PROJECT_BINARY_DIR}/compile_tests/compiles_cuda.cu
RESULT_VARIABLE _RET RESULT_VARIABLE _RET
WORKING_DIRECTORY ${PROJECT_BINARY_DIR}/compile_tests WORKING_DIRECTORY ${PROJECT_BINARY_DIR}/compile_tests
TIMEOUT 15 TIMEOUT 15
OUTPUT_QUIET OUTPUT_QUIET ERROR_QUIET
ERROR_QUIET) )
ENDIF() endif()
SET(${_VAR}_COMMANDS "${_COMMANDS}" PARENT_SCOPE) set(${_VAR}_COMMANDS "${_COMMANDS}" PARENT_SCOPE)
SET(${_VAR} ${_RET} PARENT_SCOPE) set(${_VAR} ${_RET} PARENT_SCOPE)
ENDFUNCTION() endfunction()
# this function is provided to easily select which files use the same compiler as Kokkos # this function is provided to easily select which files use the same compiler as Kokkos
# when it was installed (or nvcc_wrapper): # when it was installed (or nvcc_wrapper):
@ -215,94 +218,107 @@ ENDFUNCTION()
# #
# Use CHECK_CUDA_COMPILES to run a check when CUDA is enabled # Use CHECK_CUDA_COMPILES to run a check when CUDA is enabled
# #
FUNCTION(kokkos_compilation) function(kokkos_compilation)
CMAKE_PARSE_ARGUMENTS(COMP cmake_parse_arguments(
"GLOBAL;PROJECT;CHECK_CUDA_COMPILES" COMP "GLOBAL;PROJECT;CHECK_CUDA_COMPILES" "COMPILER" "DIRECTORY;TARGET;SOURCE;COMMAND_PREFIX" ${ARGN}
"COMPILER" )
"DIRECTORY;TARGET;SOURCE;COMMAND_PREFIX"
${ARGN})
# if built w/o CUDA support, we want to basically make this a no-op # if built w/o CUDA support, we want to basically make this a no-op
SET(_Kokkos_ENABLE_CUDA @Kokkos_ENABLE_CUDA@) set(_Kokkos_ENABLE_CUDA @Kokkos_ENABLE_CUDA@)
if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.17)
IF(CMAKE_VERSION VERSION_GREATER_EQUAL 3.17) set(MAYBE_CURRENT_INSTALLATION_ROOT "${CMAKE_CURRENT_FUNCTION_LIST_DIR}/../../..")
SET(MAYBE_CURRENT_INSTALLATION_ROOT "${CMAKE_CURRENT_FUNCTION_LIST_DIR}/../../..") endif()
ENDIF()
# search relative first and then absolute # search relative first and then absolute
SET(_HINTS "${MAYBE_CURRENT_INSTALLATION_ROOT}" "@CMAKE_INSTALL_PREFIX@") set(_HINTS "${MAYBE_CURRENT_INSTALLATION_ROOT}" "@CMAKE_INSTALL_PREFIX@")
# find kokkos_launch_compiler # find kokkos_launch_compiler
FIND_PROGRAM(Kokkos_COMPILE_LAUNCHER find_program(
Kokkos_COMPILE_LAUNCHER
NAMES kokkos_launch_compiler NAMES kokkos_launch_compiler
HINTS ${_HINTS} HINTS ${_HINTS}
PATHS ${_HINTS} PATHS ${_HINTS}
PATH_SUFFIXES bin) PATH_SUFFIXES bin
)
IF(NOT Kokkos_COMPILE_LAUNCHER) if(NOT Kokkos_COMPILE_LAUNCHER)
MESSAGE(FATAL_ERROR "Kokkos could not find 'kokkos_launch_compiler'. Please set '-DKokkos_COMPILE_LAUNCHER=/path/to/launcher'") message(
ENDIF() FATAL_ERROR
"Kokkos could not find 'kokkos_launch_compiler'. Please set '-DKokkos_COMPILE_LAUNCHER=/path/to/launcher'"
)
endif()
# if COMPILER was not specified, assume Kokkos_CXX_COMPILER # if COMPILER was not specified, assume Kokkos_CXX_COMPILER
IF(NOT COMP_COMPILER) if(NOT COMP_COMPILER)
SET(COMP_COMPILER ${Kokkos_CXX_COMPILER}) set(COMP_COMPILER ${Kokkos_CXX_COMPILER})
IF(_Kokkos_ENABLE_CUDA AND Kokkos_CXX_COMPILER_ID STREQUAL NVIDIA) if(_Kokkos_ENABLE_CUDA AND Kokkos_CXX_COMPILER_ID STREQUAL NVIDIA)
# find nvcc_wrapper # find nvcc_wrapper
FIND_PROGRAM(Kokkos_NVCC_WRAPPER find_program(
Kokkos_NVCC_WRAPPER
NAMES nvcc_wrapper NAMES nvcc_wrapper
HINTS ${_HINTS} HINTS ${_HINTS}
PATHS ${_HINTS} PATHS ${_HINTS}
PATH_SUFFIXES bin) PATH_SUFFIXES bin
)
# fatal if we can't nvcc_wrapper # fatal if we can't nvcc_wrapper
IF(NOT Kokkos_NVCC_WRAPPER) if(NOT Kokkos_NVCC_WRAPPER)
MESSAGE(FATAL_ERROR "Kokkos could not find nvcc_wrapper. Please set '-DKokkos_NVCC_WRAPPER=/path/to/nvcc_wrapper'") message(
ENDIF() FATAL_ERROR "Kokkos could not find nvcc_wrapper. Please set '-DKokkos_NVCC_WRAPPER=/path/to/nvcc_wrapper'"
SET(COMP_COMPILER ${Kokkos_NVCC_WRAPPER}) )
ENDIF() endif()
ENDIF() set(COMP_COMPILER ${Kokkos_NVCC_WRAPPER})
endif()
endif()
# check that the original compiler still exists! # check that the original compiler still exists!
IF(NOT EXISTS ${COMP_COMPILER}) if(NOT EXISTS ${COMP_COMPILER})
MESSAGE(FATAL_ERROR "Kokkos could not find original compiler: '${COMP_COMPILER}'") message(FATAL_ERROR "Kokkos could not find original compiler: '${COMP_COMPILER}'")
ENDIF() endif()
# try to ensure that compiling cuda code works! # try to ensure that compiling cuda code works!
IF(_Kokkos_ENABLE_CUDA AND COMP_CHECK_CUDA_COMPILES) if(_Kokkos_ENABLE_CUDA AND COMP_CHECK_CUDA_COMPILES)
# this may fail if kokkos_compiler launcher was used during install # this may fail if kokkos_compiler launcher was used during install
kokkos_cxx_compiler_cuda_test(_COMPILES_CUDA kokkos_cxx_compiler_cuda_test(_COMPILES_CUDA ${Kokkos_COMPILE_LAUNCHER} ${COMP_COMPILER} ${CMAKE_CXX_COMPILER})
${Kokkos_COMPILE_LAUNCHER} ${COMP_COMPILER} ${CMAKE_CXX_COMPILER})
# if above failed, throw an error # if above failed, throw an error
IF(NOT _COMPILES_CUDA) if(NOT _COMPILES_CUDA)
MESSAGE(FATAL_ERROR "kokkos_cxx_compiler_cuda_test failed! Test commands:\n${_COMPILES_CUDA_COMMANDS}") message(FATAL_ERROR "kokkos_cxx_compiler_cuda_test failed! Test commands:\n${_COMPILES_CUDA_COMMANDS}")
ENDIF() endif()
ENDIF() endif()
IF(COMP_COMMAND_PREFIX) if(COMP_COMMAND_PREFIX)
SET(_PREFIX "${COMP_COMMAND_PREFIX}") set(_PREFIX "${COMP_COMMAND_PREFIX}")
STRING(REPLACE ";" " " _PREFIX "${COMP_COMMAND_PREFIX}") string(REPLACE ";" " " _PREFIX "${COMP_COMMAND_PREFIX}")
SET(Kokkos_COMPILER_LAUNCHER "${_PREFIX} ${Kokkos_COMPILE_LAUNCHER}") set(Kokkos_COMPILER_LAUNCHER "${_PREFIX} ${Kokkos_COMPILE_LAUNCHER}")
ENDIF() endif()
IF(COMP_GLOBAL) if(COMP_GLOBAL)
# if global, don't bother setting others # if global, don't bother setting others
SET_PROPERTY(GLOBAL PROPERTY RULE_LAUNCH_COMPILE "${Kokkos_COMPILE_LAUNCHER} ${COMP_COMPILER} ${CMAKE_CXX_COMPILER}") set_property(
SET_PROPERTY(GLOBAL PROPERTY RULE_LAUNCH_LINK "${Kokkos_COMPILE_LAUNCHER} ${COMP_COMPILER} ${CMAKE_CXX_COMPILER}") GLOBAL PROPERTY RULE_LAUNCH_COMPILE "${Kokkos_COMPILE_LAUNCHER} ${COMP_COMPILER} ${CMAKE_CXX_COMPILER}"
ELSE() )
FOREACH(_TYPE PROJECT DIRECTORY TARGET SOURCE) set_property(GLOBAL PROPERTY RULE_LAUNCH_LINK "${Kokkos_COMPILE_LAUNCHER} ${COMP_COMPILER} ${CMAKE_CXX_COMPILER}")
else()
foreach(_TYPE PROJECT DIRECTORY TARGET SOURCE)
# make project/subproject scoping easy, e.g. KokkosCompilation(PROJECT) after project(...) # make project/subproject scoping easy, e.g. KokkosCompilation(PROJECT) after project(...)
IF("${_TYPE}" STREQUAL "PROJECT" AND COMP_${_TYPE}) if("${_TYPE}" STREQUAL "PROJECT" AND COMP_${_TYPE})
LIST(APPEND COMP_DIRECTORY ${PROJECT_SOURCE_DIR}) list(APPEND COMP_DIRECTORY ${PROJECT_SOURCE_DIR})
UNSET(COMP_${_TYPE}) unset(COMP_${_TYPE})
ENDIF() endif()
# set the properties if defined # set the properties if defined
IF(COMP_${_TYPE}) if(COMP_${_TYPE})
# MESSAGE(STATUS "Using ${COMP_COMPILER} :: ${_TYPE} :: ${COMP_${_TYPE}}") # MESSAGE(STATUS "Using ${COMP_COMPILER} :: ${_TYPE} :: ${COMP_${_TYPE}}")
SET_PROPERTY(${_TYPE} ${COMP_${_TYPE}} PROPERTY RULE_LAUNCH_COMPILE "${Kokkos_COMPILE_LAUNCHER} ${COMP_COMPILER} ${CMAKE_CXX_COMPILER}") set_property(
SET_PROPERTY(${_TYPE} ${COMP_${_TYPE}} PROPERTY RULE_LAUNCH_LINK "${Kokkos_COMPILE_LAUNCHER} ${COMP_COMPILER} ${CMAKE_CXX_COMPILER}") ${_TYPE} ${COMP_${_TYPE}} PROPERTY RULE_LAUNCH_COMPILE
ENDIF() "${Kokkos_COMPILE_LAUNCHER} ${COMP_COMPILER} ${CMAKE_CXX_COMPILER}"
ENDFOREACH() )
ENDIF() set_property(
ENDFUNCTION() ${_TYPE} ${COMP_${_TYPE}} PROPERTY RULE_LAUNCH_LINK
"${Kokkos_COMPILE_LAUNCHER} ${COMP_COMPILER} ${CMAKE_CXX_COMPILER}"
)
endif()
endforeach()
endif()
endfunction()

View File

@ -9,7 +9,9 @@
// KOKKOS_VERSION % 100 is the patch level // KOKKOS_VERSION % 100 is the patch level
// KOKKOS_VERSION / 100 % 100 is the minor version // KOKKOS_VERSION / 100 % 100 is the minor version
// KOKKOS_VERSION / 10000 is the major version // KOKKOS_VERSION / 10000 is the major version
#define KOKKOS_VERSION @KOKKOS_VERSION@ #cmakedefine KOKKOS_VERSION @KOKKOS_VERSION@
// Not using #cmakedefine below because a "0" FOO version number
// yields /* undef KOKKOS_VERSION_FOO */
#define KOKKOS_VERSION_MAJOR @KOKKOS_VERSION_MAJOR@ #define KOKKOS_VERSION_MAJOR @KOKKOS_VERSION_MAJOR@
#define KOKKOS_VERSION_MINOR @KOKKOS_VERSION_MINOR@ #define KOKKOS_VERSION_MINOR @KOKKOS_VERSION_MINOR@
#define KOKKOS_VERSION_PATCH @KOKKOS_VERSION_PATCH@ #define KOKKOS_VERSION_PATCH @KOKKOS_VERSION_PATCH@
@ -116,6 +118,7 @@
#cmakedefine KOKKOS_ARCH_AMD_ZEN #cmakedefine KOKKOS_ARCH_AMD_ZEN
#cmakedefine KOKKOS_ARCH_AMD_ZEN2 #cmakedefine KOKKOS_ARCH_AMD_ZEN2
#cmakedefine KOKKOS_ARCH_AMD_ZEN3 #cmakedefine KOKKOS_ARCH_AMD_ZEN3
#cmakedefine KOKKOS_ARCH_AMD_ZEN4
#cmakedefine KOKKOS_ARCH_AMD_GFX906 #cmakedefine KOKKOS_ARCH_AMD_GFX906
#cmakedefine KOKKOS_ARCH_AMD_GFX908 #cmakedefine KOKKOS_ARCH_AMD_GFX908
#cmakedefine KOKKOS_ARCH_AMD_GFX90A #cmakedefine KOKKOS_ARCH_AMD_GFX90A

View File

@ -11,9 +11,16 @@ if(KOKKOS_CXX_HOST_COMPILER_ID STREQUAL NVHPC AND CMAKE_VERSION VERSION_LESS "3.
message(FATAL_ERROR "Using NVHPC as host compiler requires at least CMake 3.20.1") message(FATAL_ERROR "Using NVHPC as host compiler requires at least CMake 3.20.1")
endif() endif()
set(TPL_CUDA_LIBRARIES "")
if(KOKKOS_ENABLE_COMPILE_AS_CMAKE_LANGUAGE)
set(TPL_CUDA_LIBRARIES CUDA::cuda_driver)
else()
set(TPL_CUDA_LIBRARIES CUDA::cuda_driver CUDA::cudart)
endif()
if(CMAKE_VERSION VERSION_GREATER_EQUAL "3.17.0") if(CMAKE_VERSION VERSION_GREATER_EQUAL "3.17.0")
find_package(CUDAToolkit REQUIRED) find_package(CUDAToolkit REQUIRED)
kokkos_create_imported_tpl(CUDA INTERFACE LINK_LIBRARIES CUDA::cuda_driver CUDA::cudart) kokkos_create_imported_tpl(CUDA INTERFACE LINK_LIBRARIES ${TPL_CUDA_LIBRARIES})
kokkos_export_cmake_tpl(CUDAToolkit REQUIRED) kokkos_export_cmake_tpl(CUDAToolkit REQUIRED)
else() else()
include(${CMAKE_CURRENT_LIST_DIR}/CudaToolkit.cmake) include(${CMAKE_CURRENT_LIST_DIR}/CudaToolkit.cmake)
@ -33,8 +40,8 @@ else()
endif() endif()
include(FindPackageHandleStandardArgs) include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(TPLCUDA ${DEFAULT_MSG} FOUND_CUDART FOUND_CUDA_DRIVER) find_package_handle_standard_args(TPLCUDA ${DEFAULT_MSG} FOUND_CUDA_DRIVER FOUND_CUDART)
if(FOUND_CUDA_DRIVER AND FOUND_CUDART) if(FOUND_CUDA_DRIVER AND FOUND_CUDART)
kokkos_create_imported_tpl(CUDA INTERFACE LINK_LIBRARIES CUDA::cuda_driver CUDA::cudart) kokkos_create_imported_tpl(CUDA INTERFACE LINK_LIBRARIES ${TPL_CUDA_LIBRARIES})
endif() endif()
endif() endif()

View File

@ -1,15 +0,0 @@
function(kokkos_set_intel_flags full_standard int_standard)
string(TOLOWER ${full_standard} FULL_LC_STANDARD)
string(TOLOWER ${int_standard} INT_LC_STANDARD)
# The following three blocks of code were copied from
# /Modules/Compiler/Intel-CXX.cmake from CMake 3.18.1 and then modified.
if(CMAKE_CXX_SIMULATE_ID STREQUAL MSVC)
set(_std -Qstd)
set(_ext c++)
else()
set(_std -std)
set(_ext gnu++)
endif()
set(KOKKOS_CXX_STANDARD_FLAG "${_std}=c++${FULL_LC_STANDARD}" PARENT_SCOPE)
set(KOKKOS_CXX_INTERMDIATE_STANDARD_FLAG "${_std}=${_ext}${INT_LC_STANDARD}" PARENT_SCOPE)
endfunction()

View File

@ -67,6 +67,7 @@ declare_and_check_host_arch(POWER9 "IBM POWER9 CPUs")
declare_and_check_host_arch(ZEN "AMD Zen architecture") declare_and_check_host_arch(ZEN "AMD Zen architecture")
declare_and_check_host_arch(ZEN2 "AMD Zen2 architecture") declare_and_check_host_arch(ZEN2 "AMD Zen2 architecture")
declare_and_check_host_arch(ZEN3 "AMD Zen3 architecture") declare_and_check_host_arch(ZEN3 "AMD Zen3 architecture")
declare_and_check_host_arch(ZEN4 "AMD Zen4 architecture")
declare_and_check_host_arch(RISCV_SG2042 "SG2042 (RISC-V) CPUs") declare_and_check_host_arch(RISCV_SG2042 "SG2042 (RISC-V) CPUs")
declare_and_check_host_arch(RISCV_RVA22V "RVA22V (RISC-V) CPUs") declare_and_check_host_arch(RISCV_RVA22V "RVA22V (RISC-V) CPUs")
@ -163,16 +164,11 @@ if(KOKKOS_ENABLE_COMPILER_WARNINGS)
endif() endif()
endif() endif()
# ICPC doesn't support -Wsuggest-override
if(KOKKOS_CXX_COMPILER_ID STREQUAL Intel)
list(REMOVE_ITEM COMMON_WARNINGS "-Wsuggest-override")
endif()
if(KOKKOS_CXX_COMPILER_ID STREQUAL Clang) if(KOKKOS_CXX_COMPILER_ID STREQUAL Clang)
list(APPEND COMMON_WARNINGS "-Wimplicit-fallthrough") list(APPEND COMMON_WARNINGS "-Wimplicit-fallthrough")
endif() endif()
set(GNU_WARNINGS "-Wempty-body" "-Wclobbered" "-Wignored-qualifiers" ${COMMON_WARNINGS}) set(GNU_WARNINGS "-Wempty-body" "-Wignored-qualifiers" ${COMMON_WARNINGS})
if(KOKKOS_CXX_COMPILER_ID STREQUAL GNU) if(KOKKOS_CXX_COMPILER_ID STREQUAL GNU)
list(APPEND GNU_WARNINGS "-Wimplicit-fallthrough") list(APPEND GNU_WARNINGS "-Wimplicit-fallthrough")
endif() endif()
@ -349,12 +345,27 @@ endif()
if(KOKKOS_ARCH_ARMV9_GRACE) if(KOKKOS_ARCH_ARMV9_GRACE)
set(KOKKOS_ARCH_ARM_NEON ON) set(KOKKOS_ARCH_ARM_NEON ON)
if(KOKKOS_CXX_HOST_COMPILER_ID STREQUAL NVHPC)
check_cxx_compiler_flag("-tp=grace" COMPILER_SUPPORTS_GRACE_AS_TARGET_PROCESSOR)
else()
check_cxx_compiler_flag("-mcpu=neoverse-n2" COMPILER_SUPPORTS_NEOVERSE_N2) check_cxx_compiler_flag("-mcpu=neoverse-n2" COMPILER_SUPPORTS_NEOVERSE_N2)
check_cxx_compiler_flag("-msve-vector-bits=128" COMPILER_SUPPORTS_SVE_VECTOR_BITS) check_cxx_compiler_flag("-msve-vector-bits=128" COMPILER_SUPPORTS_SVE_VECTOR_BITS)
if(COMPILER_SUPPORTS_NEOVERSE_N2 AND COMPILER_SUPPORTS_SVE_VECTOR_BITS) endif()
compiler_specific_flags(COMPILER_ID KOKKOS_CXX_HOST_COMPILER_ID DEFAULT -mcpu=neoverse-n2 -msve-vector-bits=128) if(COMPILER_SUPPORTS_NEOVERSE_N2 AND COMPILER_SUPPORTS_SVE_VECTOR_BITS OR COMPILER_SUPPORTS_GRACE_AS_TARGET_PROCESSOR)
compiler_specific_flags(
COMPILER_ID
KOKKOS_CXX_HOST_COMPILER_ID
NVHPC
-tp=grace
DEFAULT
-mcpu=neoverse-n2
-msve-vector-bits=128
)
else() else()
message(WARNING "Compiler does not support ARMv9 Grace architecture") message(SEND_ERROR "Your compiler does not appear to support the ARMv9 Grace architecture.
Please ensure you are using a compatible compiler and toolchain.
Alternatively, try configuring with -DKokkos_ARCH_NATIVE=ON to use the native architecture of your system."
)
endif() endif()
endif() endif()
@ -362,8 +373,6 @@ if(KOKKOS_ARCH_ZEN)
compiler_specific_flags( compiler_specific_flags(
COMPILER_ID COMPILER_ID
KOKKOS_CXX_HOST_COMPILER_ID KOKKOS_CXX_HOST_COMPILER_ID
Intel
-mavx2
MSVC MSVC
/arch:AVX2 /arch:AVX2
NVHPC NVHPC
@ -380,8 +389,6 @@ if(KOKKOS_ARCH_ZEN2)
compiler_specific_flags( compiler_specific_flags(
COMPILER_ID COMPILER_ID
KOKKOS_CXX_HOST_COMPILER_ID KOKKOS_CXX_HOST_COMPILER_ID
Intel
-mavx2
MSVC MSVC
/arch:AVX2 /arch:AVX2
NVHPC NVHPC
@ -398,12 +405,10 @@ if(KOKKOS_ARCH_ZEN3)
compiler_specific_flags( compiler_specific_flags(
COMPILER_ID COMPILER_ID
KOKKOS_CXX_HOST_COMPILER_ID KOKKOS_CXX_HOST_COMPILER_ID
Intel
-mavx2
MSVC MSVC
/arch:AVX2 /arch:AVX2
NVHPC NVHPC
-tp=zen2 -tp=zen3
DEFAULT DEFAULT
-march=znver3 -march=znver3
-mtune=znver3 -mtune=znver3
@ -412,6 +417,22 @@ if(KOKKOS_ARCH_ZEN3)
set(KOKKOS_ARCH_AVX2 ON) set(KOKKOS_ARCH_AVX2 ON)
endif() endif()
if(KOKKOS_ARCH_ZEN4)
compiler_specific_flags(
COMPILER_ID
KOKKOS_CXX_HOST_COMPILER_ID
MSVC
/arch:AVX512
NVHPC
-tp=zen4
DEFAULT
-march=znver4
-mtune=znver4
)
set(KOKKOS_ARCH_AMD_ZEN4 ON)
set(KOKKOS_ARCH_AVX512XEON ON)
endif()
if(KOKKOS_ARCH_SNB OR KOKKOS_ARCH_AMDAVX) if(KOKKOS_ARCH_SNB OR KOKKOS_ARCH_AMDAVX)
set(KOKKOS_ARCH_AVX ON) set(KOKKOS_ARCH_AVX ON)
compiler_specific_flags( compiler_specific_flags(
@ -419,8 +440,6 @@ if(KOKKOS_ARCH_SNB OR KOKKOS_ARCH_AMDAVX)
KOKKOS_CXX_HOST_COMPILER_ID KOKKOS_CXX_HOST_COMPILER_ID
Cray Cray
NO-VALUE-SPECIFIED NO-VALUE-SPECIFIED
Intel
-mavx
MSVC MSVC
/arch:AVX /arch:AVX
NVHPC NVHPC
@ -437,8 +456,6 @@ if(KOKKOS_ARCH_HSW)
KOKKOS_CXX_HOST_COMPILER_ID KOKKOS_CXX_HOST_COMPILER_ID
Cray Cray
NO-VALUE-SPECIFIED NO-VALUE-SPECIFIED
Intel
-xCORE-AVX2
MSVC MSVC
/arch:AVX2 /arch:AVX2
NVHPC NVHPC
@ -477,8 +494,6 @@ if(KOKKOS_ARCH_BDW)
KOKKOS_CXX_HOST_COMPILER_ID KOKKOS_CXX_HOST_COMPILER_ID
Cray Cray
NO-VALUE-SPECIFIED NO-VALUE-SPECIFIED
Intel
-xCORE-AVX2
MSVC MSVC
/arch:AVX2 /arch:AVX2
NVHPC NVHPC
@ -498,8 +513,6 @@ if(KOKKOS_ARCH_KNL)
KOKKOS_CXX_HOST_COMPILER_ID KOKKOS_CXX_HOST_COMPILER_ID
Cray Cray
NO-VALUE-SPECIFIED NO-VALUE-SPECIFIED
Intel
-xMIC-AVX512
MSVC MSVC
/arch:AVX512 /arch:AVX512
NVHPC NVHPC
@ -520,8 +533,6 @@ if(KOKKOS_ARCH_SKL)
KOKKOS_CXX_HOST_COMPILER_ID KOKKOS_CXX_HOST_COMPILER_ID
Cray Cray
NO-VALUE-SPECIFIED NO-VALUE-SPECIFIED
Intel
-xSKYLAKE
MSVC MSVC
/arch:AVX2 /arch:AVX2
NVHPC NVHPC
@ -539,8 +550,6 @@ if(KOKKOS_ARCH_SKX)
KOKKOS_CXX_HOST_COMPILER_ID KOKKOS_CXX_HOST_COMPILER_ID
Cray Cray
NO-VALUE-SPECIFIED NO-VALUE-SPECIFIED
Intel
-xCORE-AVX512
MSVC MSVC
/arch:AVX512 /arch:AVX512
NVHPC NVHPC
@ -1193,9 +1202,8 @@ if(KOKKOS_ENABLE_HIP AND NOT AMDGPU_ARCH_ALREADY_SPECIFIED AND NOT KOKKOS_IMPL_A
) )
else() else()
execute_process(COMMAND ${ROCM_ENUMERATOR} OUTPUT_VARIABLE GPU_ARCHS) execute_process(COMMAND ${ROCM_ENUMERATOR} OUTPUT_VARIABLE GPU_ARCHS)
string(LENGTH "${GPU_ARCHS}" len_str) # Exits early if no GPU was detected
# enumerator always output gfx000 as the first line if("${GPU_ARCHS}" STREQUAL "")
if(${len_str} LESS 8)
message(SEND_ERROR "HIP enabled but no AMD GPU architecture could be automatically detected. " message(SEND_ERROR "HIP enabled but no AMD GPU architecture could be automatically detected. "
"Please manually specify one AMD GPU architecture via -DKokkos_ARCH_{..}=ON'." "Please manually specify one AMD GPU architecture via -DKokkos_ARCH_{..}=ON'."
) )

View File

@ -163,7 +163,6 @@ if(CMAKE_CXX_STANDARD EQUAL 17)
set(KOKKOS_CLANG_CUDA_MINIMUM 10.0.0) set(KOKKOS_CLANG_CUDA_MINIMUM 10.0.0)
set(KOKKOS_CLANG_OPENMPTARGET_MINIMUM 15.0.0) set(KOKKOS_CLANG_OPENMPTARGET_MINIMUM 15.0.0)
set(KOKKOS_GCC_MINIMUM 8.2.0) set(KOKKOS_GCC_MINIMUM 8.2.0)
set(KOKKOS_INTEL_MINIMUM 19.0.5)
set(KOKKOS_INTEL_LLVM_CPU_MINIMUM 2021.1.1) set(KOKKOS_INTEL_LLVM_CPU_MINIMUM 2021.1.1)
set(KOKKOS_INTEL_LLVM_SYCL_MINIMUM 2023.0.0) set(KOKKOS_INTEL_LLVM_SYCL_MINIMUM 2023.0.0)
set(KOKKOS_NVCC_MINIMUM 11.0.0) set(KOKKOS_NVCC_MINIMUM 11.0.0)
@ -175,7 +174,6 @@ else()
set(KOKKOS_CLANG_CUDA_MINIMUM 14.0.0) set(KOKKOS_CLANG_CUDA_MINIMUM 14.0.0)
set(KOKKOS_CLANG_OPENMPTARGET_MINIMUM 15.0.0) set(KOKKOS_CLANG_OPENMPTARGET_MINIMUM 15.0.0)
set(KOKKOS_GCC_MINIMUM 10.1.0) set(KOKKOS_GCC_MINIMUM 10.1.0)
set(KOKKOS_INTEL_MINIMUM "not supported")
set(KOKKOS_INTEL_LLVM_CPU_MINIMUM 2022.0.0) set(KOKKOS_INTEL_LLVM_CPU_MINIMUM 2022.0.0)
set(KOKKOS_INTEL_LLVM_SYCL_MINIMUM 2023.0.0) set(KOKKOS_INTEL_LLVM_SYCL_MINIMUM 2023.0.0)
set(KOKKOS_NVCC_MINIMUM 12.0.0) set(KOKKOS_NVCC_MINIMUM 12.0.0)
@ -191,7 +189,7 @@ set(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n Clang(CPU) ${KOKKO
set(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n Clang(CUDA) ${KOKKOS_CLANG_CUDA_MINIMUM}") set(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n Clang(CUDA) ${KOKKOS_CLANG_CUDA_MINIMUM}")
set(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n Clang(OpenMPTarget) ${KOKKOS_CLANG_OPENMPTARGET_MINIMUM}") set(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n Clang(OpenMPTarget) ${KOKKOS_CLANG_OPENMPTARGET_MINIMUM}")
set(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n GCC ${KOKKOS_GCC_MINIMUM}") set(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n GCC ${KOKKOS_GCC_MINIMUM}")
set(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n Intel ${KOKKOS_INTEL_MINIMUM}") set(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n Intel not supported")
set(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n IntelLLVM(CPU) ${KOKKOS_INTEL_LLVM_CPU_MINIMUM}") set(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n IntelLLVM(CPU) ${KOKKOS_INTEL_LLVM_CPU_MINIMUM}")
set(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n IntelLLVM(SYCL) ${KOKKOS_INTEL_LLVM_SYCL_MINIMUM}") set(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n IntelLLVM(SYCL) ${KOKKOS_INTEL_LLVM_SYCL_MINIMUM}")
set(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n NVCC ${KOKKOS_NVCC_MINIMUM}") set(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n NVCC ${KOKKOS_NVCC_MINIMUM}")
@ -214,9 +212,7 @@ elseif(KOKKOS_CXX_COMPILER_ID STREQUAL GNU)
message(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}") message(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}")
endif() endif()
elseif(KOKKOS_CXX_COMPILER_ID STREQUAL Intel) elseif(KOKKOS_CXX_COMPILER_ID STREQUAL Intel)
if((NOT CMAKE_CXX_STANDARD EQUAL 17) OR (KOKKOS_CXX_COMPILER_VERSION VERSION_LESS ${KOKKOS_INTEL_MINIMUM}))
message(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}") message(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}")
endif()
elseif(KOKKOS_CXX_COMPILER_ID STREQUAL IntelLLVM AND NOT Kokkos_ENABLE_SYCL) elseif(KOKKOS_CXX_COMPILER_ID STREQUAL IntelLLVM AND NOT Kokkos_ENABLE_SYCL)
if(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS ${KOKKOS_INTEL_LLVM_CPU_MINIMUM}) if(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS ${KOKKOS_INTEL_LLVM_CPU_MINIMUM})
message(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}") message(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}")

View File

@ -76,7 +76,7 @@ kokkos_enable_option(
HIP_MULTIPLE_KERNEL_INSTANTIATIONS OFF HIP_MULTIPLE_KERNEL_INSTANTIATIONS OFF
"Whether multiple kernels are instantiated at compile time - improve performance but increase compile time" "Whether multiple kernels are instantiated at compile time - improve performance but increase compile time"
) )
kokkos_enable_option(IMPL_HIP_MALLOC_ASYNC OFF "Whether to enable hipMallocAsync") kokkos_enable_option(IMPL_HIP_MALLOC_ASYNC ${KOKKOS_ENABLE_HIP} "Whether to enable hipMallocAsync")
kokkos_enable_option(OPENACC_FORCE_HOST_AS_DEVICE OFF "Whether to force to use host as a target device for OpenACC") kokkos_enable_option(OPENACC_FORCE_HOST_AS_DEVICE OFF "Whether to force to use host as a target device for OpenACC")
# This option will go away eventually, but allows fallback to old implementation when needed. # This option will go away eventually, but allows fallback to old implementation when needed.

View File

@ -799,7 +799,6 @@ function(COMPILER_SPECIFIC_OPTIONS_HELPER)
NVHPC NVHPC
DEFAULT DEFAULT
Cray Cray
Intel
Clang Clang
AppleClang AppleClang
IntelLLVM IntelLLVM

View File

@ -155,9 +155,6 @@ if(NOT KOKKOS_CXX_STANDARD_FEATURE)
elseif(KOKKOS_CXX_COMPILER_ID STREQUAL NVHPC) elseif(KOKKOS_CXX_COMPILER_ID STREQUAL NVHPC)
include(${KOKKOS_SRC_PATH}/cmake/pgi.cmake) include(${KOKKOS_SRC_PATH}/cmake/pgi.cmake)
kokkos_set_pgi_flags(${KOKKOS_CXX_STANDARD} ${KOKKOS_CXX_INTERMEDIATE_STANDARD}) kokkos_set_pgi_flags(${KOKKOS_CXX_STANDARD} ${KOKKOS_CXX_INTERMEDIATE_STANDARD})
elseif(KOKKOS_CXX_COMPILER_ID STREQUAL Intel)
include(${KOKKOS_SRC_PATH}/cmake/intel.cmake)
kokkos_set_intel_flags(${KOKKOS_CXX_STANDARD} ${KOKKOS_CXX_INTERMEDIATE_STANDARD})
elseif((KOKKOS_CXX_COMPILER_ID STREQUAL "MSVC") OR ((KOKKOS_CXX_COMPILER_ID STREQUAL "NVIDIA") AND WIN32)) elseif((KOKKOS_CXX_COMPILER_ID STREQUAL "MSVC") OR ((KOKKOS_CXX_COMPILER_ID STREQUAL "NVIDIA") AND WIN32))
include(${KOKKOS_SRC_PATH}/cmake/msvc.cmake) include(${KOKKOS_SRC_PATH}/cmake/msvc.cmake)
kokkos_set_msvc_flags(${KOKKOS_CXX_STANDARD} ${KOKKOS_CXX_INTERMEDIATE_STANDARD}) kokkos_set_msvc_flags(${KOKKOS_CXX_STANDARD} ${KOKKOS_CXX_INTERMEDIATE_STANDARD})

View File

@ -106,7 +106,6 @@ function(KOKKOS_ADD_EXECUTABLE_AND_TEST ROOT_NAME)
OR Kokkos_ENABLE_SYCL OR Kokkos_ENABLE_SYCL
OR Kokkos_ENABLE_HPX OR Kokkos_ENABLE_HPX
OR Kokkos_ENABLE_IMPL_SKIP_NO_RTTI_FLAG OR Kokkos_ENABLE_IMPL_SKIP_NO_RTTI_FLAG
OR (KOKKOS_CXX_COMPILER_ID STREQUAL "Intel" AND KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 2021.2.0)
OR (KOKKOS_CXX_COMPILER_ID STREQUAL "NVIDIA" AND KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 11.3.0) OR (KOKKOS_CXX_COMPILER_ID STREQUAL "NVIDIA" AND KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 11.3.0)
OR (KOKKOS_CXX_COMPILER_ID STREQUAL "NVIDIA" AND KOKKOS_CXX_HOST_COMPILER_ID STREQUAL "MSVC")) OR (KOKKOS_CXX_COMPILER_ID STREQUAL "NVIDIA" AND KOKKOS_CXX_HOST_COMPILER_ID STREQUAL "MSVC"))
) )

View File

@ -18,6 +18,8 @@ LINK ?= $(CXX)
LDFLAGS ?= LDFLAGS ?=
override LDFLAGS += -lpthread override LDFLAGS += -lpthread
KOKKOS_USE_DEPRECATED_MAKEFILES=1
include $(KOKKOS_PATH)/Makefile.kokkos include $(KOKKOS_PATH)/Makefile.kokkos
KOKKOS_CXXFLAGS += -I$(GTEST_PATH) -I${KOKKOS_PATH}/containers/performance_tests KOKKOS_CXXFLAGS += -I$(GTEST_PATH) -I${KOKKOS_PATH}/containers/performance_tests

View File

@ -22,6 +22,7 @@
#endif #endif
#include <Kokkos_Core.hpp> #include <Kokkos_Core.hpp>
#include <Kokkos_BitManipulation.hpp>
#include <Kokkos_Functional.hpp> #include <Kokkos_Functional.hpp>
#include <impl/Kokkos_Bitset_impl.hpp> #include <impl/Kokkos_Bitset_impl.hpp>
@ -62,13 +63,11 @@ class Bitset {
BIT_SCAN_REVERSE | MOVE_HINT_BACKWARD; BIT_SCAN_REVERSE | MOVE_HINT_BACKWARD;
private: private:
enum : unsigned { static constexpr unsigned block_size = sizeof(unsigned) * CHAR_BIT;
block_size = static_cast<unsigned>(sizeof(unsigned) * CHAR_BIT) static constexpr unsigned block_mask = block_size - 1u;
}; static constexpr unsigned block_shift =
enum : unsigned { block_mask = block_size - 1u }; Kokkos::has_single_bit(block_size) ? Kokkos::bit_width(block_size) - 1
enum : unsigned { : ~0u;
block_shift = Kokkos::Impl::integral_power_of_two(block_size)
};
//! Type of @ref m_blocks. //! Type of @ref m_blocks.
using block_view_type = View<unsigned*, Device, MemoryTraits<RandomAccess>>; using block_view_type = View<unsigned*, Device, MemoryTraits<RandomAccess>>;
@ -135,9 +134,9 @@ class Bitset {
if (m_last_block_mask) { if (m_last_block_mask) {
// clear the unused bits in the last block // clear the unused bits in the last block
Kokkos::Impl::DeepCopy<typename Device::memory_space, Kokkos::HostSpace>( auto last_block = Kokkos::subview(m_blocks, m_blocks.extent(0) - 1u);
m_blocks.data() + (m_blocks.extent(0) - 1u), &m_last_block_mask, Kokkos::deep_copy(typename Device::execution_space{}, last_block,
sizeof(unsigned)); m_last_block_mask);
Kokkos::fence( Kokkos::fence(
"Bitset::set: fence after clearing unused bits copying from " "Bitset::set: fence after clearing unused bits copying from "
"HostSpace"); "HostSpace");
@ -324,9 +323,11 @@ class ConstBitset {
using block_view_type = typename Bitset<Device>::block_view_type::const_type; using block_view_type = typename Bitset<Device>::block_view_type::const_type;
private: private:
enum { block_size = static_cast<unsigned>(sizeof(unsigned) * CHAR_BIT) }; static constexpr unsigned block_size = sizeof(unsigned) * CHAR_BIT;
enum { block_mask = block_size - 1u }; static constexpr unsigned block_mask = block_size - 1u;
enum { block_shift = Kokkos::Impl::integral_power_of_two(block_size) }; static constexpr unsigned block_shift =
Kokkos::has_single_bit(block_size) ? Kokkos::bit_width(block_size) - 1
: ~0u;
public: public:
KOKKOS_FUNCTION KOKKOS_FUNCTION
@ -400,13 +401,7 @@ void deep_copy(Bitset<DstDevice>& dst, Bitset<SrcDevice> const& src) {
Kokkos::Impl::throw_runtime_exception( Kokkos::Impl::throw_runtime_exception(
"Error: Cannot deep_copy bitsets of different sizes!"); "Error: Cannot deep_copy bitsets of different sizes!");
} }
Kokkos::deep_copy(dst.m_blocks, src.m_blocks);
Kokkos::fence("Bitset::deep_copy: fence before copy operation");
Kokkos::Impl::DeepCopy<typename DstDevice::memory_space,
typename SrcDevice::memory_space>(
dst.m_blocks.data(), src.m_blocks.data(),
sizeof(unsigned) * src.m_blocks.extent(0));
Kokkos::fence("Bitset::deep_copy: fence after copy operation");
} }
template <typename DstDevice, typename SrcDevice> template <typename DstDevice, typename SrcDevice>
@ -415,13 +410,7 @@ void deep_copy(Bitset<DstDevice>& dst, ConstBitset<SrcDevice> const& src) {
Kokkos::Impl::throw_runtime_exception( Kokkos::Impl::throw_runtime_exception(
"Error: Cannot deep_copy bitsets of different sizes!"); "Error: Cannot deep_copy bitsets of different sizes!");
} }
Kokkos::deep_copy(dst.m_blocks, src.m_blocks);
Kokkos::fence("Bitset::deep_copy: fence before copy operation");
Kokkos::Impl::DeepCopy<typename DstDevice::memory_space,
typename SrcDevice::memory_space>(
dst.m_blocks.data(), src.m_blocks.data(),
sizeof(unsigned) * src.m_blocks.extent(0));
Kokkos::fence("Bitset::deep_copy: fence after copy operation");
} }
template <typename DstDevice, typename SrcDevice> template <typename DstDevice, typename SrcDevice>
@ -430,13 +419,7 @@ void deep_copy(ConstBitset<DstDevice>& dst, ConstBitset<SrcDevice> const& src) {
Kokkos::Impl::throw_runtime_exception( Kokkos::Impl::throw_runtime_exception(
"Error: Cannot deep_copy bitsets of different sizes!"); "Error: Cannot deep_copy bitsets of different sizes!");
} }
Kokkos::deep_copy(dst.m_blocks, src.m_blocks);
Kokkos::fence("Bitset::deep_copy: fence before copy operation");
Kokkos::Impl::DeepCopy<typename DstDevice::memory_space,
typename SrcDevice::memory_space>(
dst.m_blocks.data(), src.m_blocks.data(),
sizeof(unsigned) * src.m_blocks.extent(0));
Kokkos::fence("Bitset::deep_copy: fence after copy operation");
} }
} // namespace Kokkos } // namespace Kokkos

View File

@ -211,6 +211,12 @@ class DualView : public ViewTraits<DataType, Properties...> {
public: public:
//@} //@}
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
public:
#else
private:
#endif
// Moved this specifically after modified_flags to resolve an alignment issue // Moved this specifically after modified_flags to resolve an alignment issue
// on MSVC/NVCC // on MSVC/NVCC
//! \name The two View instances. //! \name The two View instances.
@ -219,6 +225,7 @@ class DualView : public ViewTraits<DataType, Properties...> {
t_host h_view; t_host h_view;
//@} //@}
public:
//! \name Constructors //! \name Constructors
//@{ //@{
@ -456,16 +463,21 @@ class DualView : public ViewTraits<DataType, Properties...> {
} }
} }
} }
#ifdef KOKKOS_COMPILER_INTEL
__builtin_unreachable();
#endif
} }
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
t_host view_host() const { return h_view; } t_host view_host() const { return h_view; }
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
t_dev view_device() const { return d_view; } t_dev view_device() const { return d_view; }
#else
KOKKOS_INLINE_FUNCTION
const t_host& view_host() const { return h_view; }
KOKKOS_INLINE_FUNCTION
const t_dev& view_device() const { return d_view; }
#endif
KOKKOS_INLINE_FUNCTION constexpr bool is_allocated() const { KOKKOS_INLINE_FUNCTION constexpr bool is_allocated() const {
return (d_view.is_allocated() && h_view.is_allocated()); return (d_view.is_allocated() && h_view.is_allocated());
@ -615,8 +627,8 @@ class DualView : public ViewTraits<DataType, Properties...> {
impl_report_host_sync(); impl_report_host_sync();
} }
} }
if constexpr (std::is_same<typename t_host::memory_space, if constexpr (std::is_same_v<typename t_host::memory_space,
typename t_dev::memory_space>::value) { typename t_dev::memory_space>) {
typename t_dev::execution_space().fence( typename t_dev::execution_space().fence(
"Kokkos::DualView<>::sync: fence after syncing DualView"); "Kokkos::DualView<>::sync: fence after syncing DualView");
typename t_host::execution_space().fence( typename t_host::execution_space().fence(
@ -687,8 +699,8 @@ class DualView : public ViewTraits<DataType, Properties...> {
// deliberately passing args by cref as they're used multiple times // deliberately passing args by cref as they're used multiple times
template <typename... Args> template <typename... Args>
void sync_host_impl(Args const&... args) { void sync_host_impl(Args const&... args) {
if (!std::is_same<typename traits::data_type, if (!std::is_same_v<typename traits::data_type,
typename traits::non_const_data_type>::value) typename traits::non_const_data_type>)
Impl::throw_runtime_exception( Impl::throw_runtime_exception(
"Calling sync_host on a DualView with a const datatype."); "Calling sync_host on a DualView with a const datatype.");
if (modified_flags.data() == nullptr) return; if (modified_flags.data() == nullptr) return;
@ -718,8 +730,8 @@ class DualView : public ViewTraits<DataType, Properties...> {
// deliberately passing args by cref as they're used multiple times // deliberately passing args by cref as they're used multiple times
template <typename... Args> template <typename... Args>
void sync_device_impl(Args const&... args) { void sync_device_impl(Args const&... args) {
if (!std::is_same<typename traits::data_type, if (!std::is_same_v<typename traits::data_type,
typename traits::non_const_data_type>::value) typename traits::non_const_data_type>)
Impl::throw_runtime_exception( Impl::throw_runtime_exception(
"Calling sync_device on a DualView with a const datatype."); "Calling sync_device on a DualView with a const datatype.");
if (modified_flags.data() == nullptr) return; if (modified_flags.data() == nullptr) return;
@ -1264,10 +1276,10 @@ namespace Kokkos {
template <class DT, class... DP, class ST, class... SP> template <class DT, class... DP, class ST, class... SP>
void deep_copy(DualView<DT, DP...>& dst, const DualView<ST, SP...>& src) { void deep_copy(DualView<DT, DP...>& dst, const DualView<ST, SP...>& src) {
if (src.need_sync_device()) { if (src.need_sync_device()) {
deep_copy(dst.h_view, src.h_view); deep_copy(dst.view_host(), src.view_host());
dst.modify_host(); dst.modify_host();
} else { } else {
deep_copy(dst.d_view, src.d_view); deep_copy(dst.view_device(), src.view_device());
dst.modify_device(); dst.modify_device();
} }
} }
@ -1276,10 +1288,10 @@ template <class ExecutionSpace, class DT, class... DP, class ST, class... SP>
void deep_copy(const ExecutionSpace& exec, DualView<DT, DP...>& dst, void deep_copy(const ExecutionSpace& exec, DualView<DT, DP...>& dst,
const DualView<ST, SP...>& src) { const DualView<ST, SP...>& src) {
if (src.need_sync_device()) { if (src.need_sync_device()) {
deep_copy(exec, dst.h_view, src.h_view); deep_copy(exec, dst.view_host(), src.view_host());
dst.modify_host(); dst.modify_host();
} else { } else {
deep_copy(exec, dst.d_view, src.d_view); deep_copy(exec, dst.view_device(), src.view_device());
dst.modify_device(); dst.modify_device();
} }
} }

View File

@ -626,9 +626,8 @@ class DynRankView : private View<DataType*******, Properties...> {
} else } else
#endif #endif
return view_type::operator()(i0, 0, 0, 0, 0, 0, 0); return view_type::operator()(i0, 0, 0, 0, 0, 0, 0);
#if defined KOKKOS_COMPILER_INTEL || \ #if defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \
(defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \ !defined(KOKKOS_COMPILER_MSVC)
!defined(KOKKOS_COMPILER_MSVC))
__builtin_unreachable(); __builtin_unreachable();
#endif #endif
} }
@ -656,9 +655,8 @@ class DynRankView : private View<DataType*******, Properties...> {
} else } else
#endif #endif
return view_type::operator()(i0, i1, 0, 0, 0, 0, 0); return view_type::operator()(i0, i1, 0, 0, 0, 0, 0);
#if defined KOKKOS_COMPILER_INTEL || \ #if defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \
(defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \ !defined(KOKKOS_COMPILER_MSVC)
!defined(KOKKOS_COMPILER_MSVC))
__builtin_unreachable(); __builtin_unreachable();
#endif #endif
} }
@ -690,9 +688,8 @@ class DynRankView : private View<DataType*******, Properties...> {
} else } else
#endif #endif
return view_type::operator()(i0, i1, i2, 0, 0, 0, 0); return view_type::operator()(i0, i1, i2, 0, 0, 0, 0);
#if defined KOKKOS_COMPILER_INTEL || \ #if defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \
(defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \ !defined(KOKKOS_COMPILER_MSVC)
!defined(KOKKOS_COMPILER_MSVC))
__builtin_unreachable(); __builtin_unreachable();
#endif #endif
} }
@ -1124,57 +1121,6 @@ KOKKOS_INLINE_FUNCTION bool operator!=(const DynRankView<LT, LP...>& lhs,
namespace Kokkos { namespace Kokkos {
namespace Impl { namespace Impl {
template <class OutputView, class Enable = void>
struct DynRankViewFill {
using const_value_type = typename OutputView::traits::const_value_type;
const OutputView output;
const_value_type input;
KOKKOS_INLINE_FUNCTION
void operator()(const size_t i0) const {
const size_t n1 = output.extent(1);
const size_t n2 = output.extent(2);
const size_t n3 = output.extent(3);
const size_t n4 = output.extent(4);
const size_t n5 = output.extent(5);
const size_t n6 = output.extent(6);
for (size_t i1 = 0; i1 < n1; ++i1) {
for (size_t i2 = 0; i2 < n2; ++i2) {
for (size_t i3 = 0; i3 < n3; ++i3) {
for (size_t i4 = 0; i4 < n4; ++i4) {
for (size_t i5 = 0; i5 < n5; ++i5) {
for (size_t i6 = 0; i6 < n6; ++i6) {
output.access(i0, i1, i2, i3, i4, i5, i6) = input;
}
}
}
}
}
}
}
DynRankViewFill(const OutputView& arg_out, const_value_type& arg_in)
: output(arg_out), input(arg_in) {
using execution_space = typename OutputView::execution_space;
using Policy = Kokkos::RangePolicy<execution_space>;
Kokkos::parallel_for("Kokkos::DynRankViewFill", Policy(0, output.extent(0)),
*this);
}
};
template <class OutputView>
struct DynRankViewFill<OutputView, std::enable_if_t<OutputView::rank == 0>> {
DynRankViewFill(const OutputView& dst,
const typename OutputView::const_value_type& src) {
Kokkos::Impl::DeepCopy<typename OutputView::memory_space,
Kokkos::HostSpace>(
dst.data(), &src, sizeof(typename OutputView::const_value_type));
}
};
template <class OutputView, class InputView, template <class OutputView, class InputView,
class ExecSpace = typename OutputView::execution_space> class ExecSpace = typename OutputView::execution_space>
struct DynRankViewRemap { struct DynRankViewRemap {
@ -1521,9 +1467,8 @@ inline auto create_mirror(const DynRankView<T, P...>& src,
return dst_type(prop_copy, return dst_type(prop_copy,
Impl::reconstructLayout(src.layout(), src.rank())); Impl::reconstructLayout(src.layout(), src.rank()));
} }
#if defined(KOKKOS_COMPILER_INTEL) || \ #if defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \
(defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \ !defined(KOKKOS_COMPILER_MSVC)
!defined(KOKKOS_COMPILER_MSVC))
__builtin_unreachable(); __builtin_unreachable();
#endif #endif
} }
@ -1611,9 +1556,8 @@ inline auto create_mirror_view(
return Kokkos::Impl::choose_create_mirror(src, arg_prop); return Kokkos::Impl::choose_create_mirror(src, arg_prop);
} }
} }
#if defined(KOKKOS_COMPILER_INTEL) || \ #if defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \
(defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \ !defined(KOKKOS_COMPILER_MSVC)
!defined(KOKKOS_COMPILER_MSVC))
__builtin_unreachable(); __builtin_unreachable();
#endif #endif
} }
@ -1754,6 +1698,7 @@ inline void impl_resize(const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop,
Kokkos::Impl::DynRankViewRemap<drview_type, drview_type>( Kokkos::Impl::DynRankViewRemap<drview_type, drview_type>(
Impl::get_property<Impl::ExecutionSpaceTag>(prop_copy), v_resized, v); Impl::get_property<Impl::ExecutionSpaceTag>(prop_copy), v_resized, v);
else { else {
// NOLINTNEXTLINE(bugprone-unused-raii)
Kokkos::Impl::DynRankViewRemap<drview_type, drview_type>(v_resized, v); Kokkos::Impl::DynRankViewRemap<drview_type, drview_type>(v_resized, v);
Kokkos::fence("Kokkos::resize(DynRankView)"); Kokkos::fence("Kokkos::resize(DynRankView)");
} }

Some files were not shown because too many files have changed in this diff Show More