Merge branch 'develop' into next_release
This commit is contained in:
1
.github/CODEOWNERS
vendored
1
.github/CODEOWNERS
vendored
@ -71,6 +71,7 @@ src/EXTRA-COMMAND/group_ndx.* @akohlmey
|
|||||||
src/EXTRA-COMMAND/ndx_group.* @akohlmey
|
src/EXTRA-COMMAND/ndx_group.* @akohlmey
|
||||||
src/EXTRA-COMPUTE/compute_stress_mop*.* @RomainVermorel
|
src/EXTRA-COMPUTE/compute_stress_mop*.* @RomainVermorel
|
||||||
src/EXTRA-COMPUTE/compute_born_matrix.* @Bibobu @athomps
|
src/EXTRA-COMPUTE/compute_born_matrix.* @Bibobu @athomps
|
||||||
|
src/EXTRA-DUMP/dump_extxyz.* @fxcoudert
|
||||||
src/EXTRA-FIX/fix_deform_pressure.* @jtclemm
|
src/EXTRA-FIX/fix_deform_pressure.* @jtclemm
|
||||||
src/EXTRA-PAIR/pair_dispersion_d3.* @soniasolomoni @arthurfl
|
src/EXTRA-PAIR/pair_dispersion_d3.* @soniasolomoni @arthurfl
|
||||||
src/EXTRA-PAIR/d3_parameters.h @soniasolomoni @arthurfl
|
src/EXTRA-PAIR/d3_parameters.h @soniasolomoni @arthurfl
|
||||||
|
|||||||
@ -57,8 +57,8 @@ if(DOWNLOAD_KOKKOS)
|
|||||||
list(APPEND KOKKOS_LIB_BUILD_ARGS "-DCMAKE_CXX_EXTENSIONS=${CMAKE_CXX_EXTENSIONS}")
|
list(APPEND KOKKOS_LIB_BUILD_ARGS "-DCMAKE_CXX_EXTENSIONS=${CMAKE_CXX_EXTENSIONS}")
|
||||||
list(APPEND KOKKOS_LIB_BUILD_ARGS "-DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}")
|
list(APPEND KOKKOS_LIB_BUILD_ARGS "-DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}")
|
||||||
include(ExternalProject)
|
include(ExternalProject)
|
||||||
set(KOKKOS_URL "https://github.com/kokkos/kokkos/archive/4.5.01.tar.gz" CACHE STRING "URL for KOKKOS tarball")
|
set(KOKKOS_URL "https://github.com/kokkos/kokkos/archive/4.6.00.tar.gz" CACHE STRING "URL for KOKKOS tarball")
|
||||||
set(KOKKOS_MD5 "4d832aa0284169d9e3fbae3165286bc6" CACHE STRING "MD5 checksum of KOKKOS tarball")
|
set(KOKKOS_MD5 "61b2b69ae50d83eedcc7d47a3fa3d6cb" CACHE STRING "MD5 checksum of KOKKOS tarball")
|
||||||
mark_as_advanced(KOKKOS_URL)
|
mark_as_advanced(KOKKOS_URL)
|
||||||
mark_as_advanced(KOKKOS_MD5)
|
mark_as_advanced(KOKKOS_MD5)
|
||||||
GetFallbackURL(KOKKOS_URL KOKKOS_FALLBACK)
|
GetFallbackURL(KOKKOS_URL KOKKOS_FALLBACK)
|
||||||
@ -83,7 +83,7 @@ if(DOWNLOAD_KOKKOS)
|
|||||||
add_dependencies(LAMMPS::KOKKOSCORE kokkos_build)
|
add_dependencies(LAMMPS::KOKKOSCORE kokkos_build)
|
||||||
add_dependencies(LAMMPS::KOKKOSCONTAINERS kokkos_build)
|
add_dependencies(LAMMPS::KOKKOSCONTAINERS kokkos_build)
|
||||||
elseif(EXTERNAL_KOKKOS)
|
elseif(EXTERNAL_KOKKOS)
|
||||||
find_package(Kokkos 4.5.01 REQUIRED CONFIG)
|
find_package(Kokkos 4.6.00 REQUIRED CONFIG)
|
||||||
target_link_libraries(lammps PRIVATE Kokkos::kokkos)
|
target_link_libraries(lammps PRIVATE Kokkos::kokkos)
|
||||||
else()
|
else()
|
||||||
set(LAMMPS_LIB_KOKKOS_SRC_DIR ${LAMMPS_LIB_SOURCE_DIR}/kokkos)
|
set(LAMMPS_LIB_KOKKOS_SRC_DIR ${LAMMPS_LIB_SOURCE_DIR}/kokkos)
|
||||||
|
|||||||
@ -611,6 +611,9 @@ They must be specified in uppercase.
|
|||||||
* - ZEN3
|
* - ZEN3
|
||||||
- HOST
|
- HOST
|
||||||
- AMD Zen3 architecture
|
- AMD Zen3 architecture
|
||||||
|
* - ZEN4
|
||||||
|
- HOST
|
||||||
|
- AMD Zen4 architecture
|
||||||
* - RISCV_SG2042
|
* - RISCV_SG2042
|
||||||
- HOST
|
- HOST
|
||||||
- SG2042 (RISC-V) CPUs
|
- SG2042 (RISC-V) CPUs
|
||||||
@ -714,7 +717,7 @@ They must be specified in uppercase.
|
|||||||
- GPU
|
- GPU
|
||||||
- Intel GPU Ponte Vecchio
|
- Intel GPU Ponte Vecchio
|
||||||
|
|
||||||
This list was last updated for version 4.5.1 of the Kokkos library.
|
This list was last updated for version 4.6.0 of the Kokkos library.
|
||||||
|
|
||||||
.. tabs::
|
.. tabs::
|
||||||
|
|
||||||
|
|||||||
@ -128,7 +128,7 @@ OPT.
|
|||||||
* :doc:`harmonic (iko) <dihedral_harmonic>`
|
* :doc:`harmonic (iko) <dihedral_harmonic>`
|
||||||
* :doc:`helix (o) <dihedral_helix>`
|
* :doc:`helix (o) <dihedral_helix>`
|
||||||
* :doc:`lepton (o) <dihedral_lepton>`
|
* :doc:`lepton (o) <dihedral_lepton>`
|
||||||
* :doc:`multi/harmonic (o) <dihedral_multi_harmonic>`
|
* :doc:`multi/harmonic (ko) <dihedral_multi_harmonic>`
|
||||||
* :doc:`nharmonic (o) <dihedral_nharmonic>`
|
* :doc:`nharmonic (o) <dihedral_nharmonic>`
|
||||||
* :doc:`opls (iko) <dihedral_opls>`
|
* :doc:`opls (iko) <dihedral_opls>`
|
||||||
* :doc:`quadratic (o) <dihedral_quadratic>`
|
* :doc:`quadratic (o) <dihedral_quadratic>`
|
||||||
|
|||||||
@ -19,6 +19,7 @@ An alphabetic list of all LAMMPS :doc:`dump <dump>` commands.
|
|||||||
* :doc:`custom/gz <dump>`
|
* :doc:`custom/gz <dump>`
|
||||||
* :doc:`custom/zstd <dump>`
|
* :doc:`custom/zstd <dump>`
|
||||||
* :doc:`dcd <dump>`
|
* :doc:`dcd <dump>`
|
||||||
|
* :doc:`extxyz <dump>`
|
||||||
* :doc:`grid <dump>`
|
* :doc:`grid <dump>`
|
||||||
* :doc:`grid/vtk <dump>`
|
* :doc:`grid/vtk <dump>`
|
||||||
* :doc:`h5md <dump_h5md>`
|
* :doc:`h5md <dump_h5md>`
|
||||||
|
|||||||
@ -186,6 +186,7 @@ OPT.
|
|||||||
* :doc:`qeq/fire <fix_qeq>`
|
* :doc:`qeq/fire <fix_qeq>`
|
||||||
* :doc:`qeq/point <fix_qeq>`
|
* :doc:`qeq/point <fix_qeq>`
|
||||||
* :doc:`qeq/reaxff (ko) <fix_qeq_reaxff>`
|
* :doc:`qeq/reaxff (ko) <fix_qeq_reaxff>`
|
||||||
|
* :doc:`qeq/rel/reaxff <fix_qeq_rel_reaxff>`
|
||||||
* :doc:`qeq/shielded <fix_qeq>`
|
* :doc:`qeq/shielded <fix_qeq>`
|
||||||
* :doc:`qeq/slater <fix_qeq>`
|
* :doc:`qeq/slater <fix_qeq>`
|
||||||
* :doc:`qmmm <fix_qmmm>`
|
* :doc:`qmmm <fix_qmmm>`
|
||||||
|
|||||||
@ -4,7 +4,7 @@
|
|||||||
dihedral_style multi/harmonic command
|
dihedral_style multi/harmonic command
|
||||||
=====================================
|
=====================================
|
||||||
|
|
||||||
Accelerator Variants: *multi/harmonic/omp*
|
Accelerator Variants: *multi/harmonic/kk*, *multi/harmonic/omp*
|
||||||
|
|
||||||
Syntax
|
Syntax
|
||||||
""""""
|
""""""
|
||||||
|
|||||||
@ -3,6 +3,7 @@
|
|||||||
.. index:: dump cfg
|
.. index:: dump cfg
|
||||||
.. index:: dump custom
|
.. index:: dump custom
|
||||||
.. index:: dump dcd
|
.. index:: dump dcd
|
||||||
|
.. index:: dump extxyz
|
||||||
.. index:: dump grid
|
.. index:: dump grid
|
||||||
.. index:: dump grid/vtk
|
.. index:: dump grid/vtk
|
||||||
.. index:: dump local
|
.. index:: dump local
|
||||||
@ -59,7 +60,7 @@ Syntax
|
|||||||
|
|
||||||
* ID = user-assigned name for the dump
|
* ID = user-assigned name for the dump
|
||||||
* group-ID = ID of the group of atoms to be dumped
|
* group-ID = ID of the group of atoms to be dumped
|
||||||
* style = *atom* or *atom/adios* or *atom/gz* or *atom/zstd* or *cfg* or *cfg/gz* or *cfg/zstd* or *cfg/uef* or *custom* or *custom/gz* or *custom/zstd* or *custom/adios* or *dcd* or *grid* or *grid/vtk* or *h5md* or *image* or *local* or *local/gz* or *local/zstd* or *molfile* or *movie* or *netcdf* or *netcdf/mpiio* or *vtk* or *xtc* or *xyz* or *xyz/gz* or *xyz/zstd* or *yaml*
|
* style = *atom* or *atom/adios* or *atom/gz* or *atom/zstd* or *cfg* or *cfg/gz* or *cfg/zstd* or *cfg/uef* or *custom* or *custom/gz* or *custom/zstd* or *custom/adios* or *dcd* or *extxyz* or *grid* or *grid/vtk* or *h5md* or *image* or *local* or *local/gz* or *local/zstd* or *molfile* or *movie* or *netcdf* or *netcdf/mpiio* or *vtk* or *xtc* or *xyz* or *xyz/gz* or *xyz/zstd* or *yaml*
|
||||||
* N = dump on timesteps which are multiples of N
|
* N = dump on timesteps which are multiples of N
|
||||||
* file = name of file to write dump info to
|
* file = name of file to write dump info to
|
||||||
* attribute1,attribute2,... = list of attributes for a particular style
|
* attribute1,attribute2,... = list of attributes for a particular style
|
||||||
@ -77,6 +78,7 @@ Syntax
|
|||||||
*custom*, *custom/gz*, *custom/zstd* attributes = see below
|
*custom*, *custom/gz*, *custom/zstd* attributes = see below
|
||||||
*custom/adios* attributes = same as *custom* attributes, discussed on :doc:`dump custom/adios <dump_adios>` page
|
*custom/adios* attributes = same as *custom* attributes, discussed on :doc:`dump custom/adios <dump_adios>` page
|
||||||
*dcd* attributes = none
|
*dcd* attributes = none
|
||||||
|
*extxyz* attributes = none
|
||||||
*h5md* attributes = discussed on :doc:`dump h5md <dump_h5md>` page
|
*h5md* attributes = discussed on :doc:`dump h5md <dump_h5md>` page
|
||||||
*grid* attributes = see below
|
*grid* attributes = see below
|
||||||
*grid/vtk* attributes = see below
|
*grid/vtk* attributes = see below
|
||||||
@ -242,28 +244,29 @@ all the processors or multiple smaller files.
|
|||||||
frames consistently to the same atom. This can lead to incorrect
|
frames consistently to the same atom. This can lead to incorrect
|
||||||
visualizations or results. LAMMPS will print a warning in such cases.
|
visualizations or results. LAMMPS will print a warning in such cases.
|
||||||
|
|
||||||
For the *atom*, *custom*, *cfg*, *grid*, and *local* styles, sorting
|
For the *atom*, *custom*, *cfg*, *grid*, and *local* styles, sorting is
|
||||||
is off by default. For the *dcd*, *grid/vtk*, *xtc*, *xyz*, and
|
off by default. For the *dcd*, *extxyz*, *grid/vtk*, *xtc*, *xyz*, and
|
||||||
*molfile* styles, sorting by atom ID or grid ID is on by default. See
|
*molfile* styles, sorting by atom ID or grid ID is on by default. See
|
||||||
the :doc:`dump_modify <dump_modify>` page for details.
|
the :doc:`dump_modify <dump_modify>` page for details.
|
||||||
|
|
||||||
The *style* keyword determines what kind of data is written to the
|
The *style* keyword determines what kind of data is written to the
|
||||||
dump file(s) and in what format.
|
dump file(s) and in what format.
|
||||||
|
|
||||||
Note that *atom*, *custom*, *dcd*, *xtc*, *xyz*, and *yaml* style dump
|
Note that *atom*, *custom*, *dcd*, *extxyz*, *xtc*, *xyz*, and *yaml*
|
||||||
files can be read directly by `VMD <https://www.ks.uiuc.edu/Research/vmd>`_,
|
style dump files can be read directly by `VMD
|
||||||
a popular tool for visualizing and analyzing trajectories from atomic
|
<https://www.ks.uiuc.edu/Research/vmd>`_, a popular tool for visualizing
|
||||||
and molecular systems. For reading *netcdf* style dump files, the
|
and analyzing trajectories from atomic and molecular systems. For
|
||||||
netcdf plugin needs to be recompiled from source using a NetCDF version
|
reading *netcdf* style dump files, the netcdf plugin needs to be
|
||||||
compatible with the one used by LAMMPS. The bundled plugin binary
|
recompiled from source using a NetCDF version compatible with the one
|
||||||
uses a very old version of NetCDF that is not compatible with LAMMPS.
|
used by LAMMPS. The bundled plugin binary uses a very old version of
|
||||||
|
NetCDF that is not compatible with LAMMPS.
|
||||||
|
|
||||||
Likewise the `OVITO visualization package <https://www.ovito.org>`_,
|
Likewise the `OVITO visualization package <https://www.ovito.org>`_,
|
||||||
popular for materials modeling, can read the *atom*, *custom*,
|
popular for materials modeling, can read the *atom*, *custom*, *extxyz*,
|
||||||
*local*, *xtc*, *cfg*, *netcdf*, and *xyz* style atom dump files
|
*local*, *xtc*, *cfg*, *netcdf*, and *xyz* style atom dump files
|
||||||
directly. With version 3.8 and above, OVITO can also read and
|
directly. With version 3.8 and above, OVITO can also read and visualize
|
||||||
visualize *grid* style dump files with grid cell data, including
|
*grid* style dump files with grid cell data, including iso-surface
|
||||||
iso-surface images of the grid cell values.
|
images of the grid cell values.
|
||||||
|
|
||||||
Note that settings made via the :doc:`dump_modify <dump_modify>`
|
Note that settings made via the :doc:`dump_modify <dump_modify>`
|
||||||
command can also alter the format of individual values and content of
|
command can also alter the format of individual values and content of
|
||||||
@ -475,6 +478,24 @@ label). This option will help many visualization programs to guess bonds
|
|||||||
and colors. You can use the :doc:`dump_modify types labels <dump_modify>`
|
and colors. You can use the :doc:`dump_modify types labels <dump_modify>`
|
||||||
option to replace numeric atom types with :doc:`type labels <Howto_type_labels>`.
|
option to replace numeric atom types with :doc:`type labels <Howto_type_labels>`.
|
||||||
|
|
||||||
|
.. versionadded:: TBD
|
||||||
|
|
||||||
|
The *extxyz* style writes XYZ files compatible with the Extended XYZ (or
|
||||||
|
ExtXYZ) format as defined as defined in `the libAtoms specification
|
||||||
|
<https://github.com/libAtoms/extxyz>`_. Specifically, the following
|
||||||
|
information will be dumped:
|
||||||
|
|
||||||
|
* timestep
|
||||||
|
* time, which can be disabled with :doc:`dump_modify time no <dump_modify>`
|
||||||
|
* simulation box lattice and pbc conditions
|
||||||
|
* atomic forces, which can be disabled with :doc:`dump_modify forces no <dump_modify>`
|
||||||
|
* atomic velocities, which can be disabled with :doc:`dump_modify vel no <dump_modify>`
|
||||||
|
* atomic masses, if enabled with :doc:`dump_modify mass yes <dump_modify>`
|
||||||
|
|
||||||
|
Dump style *extxyz* requires either that a :doc:`type label map for atoms types
|
||||||
|
<labelmap>` is defined or :doc:`dump_modify element <dump_modify>` is used to
|
||||||
|
set up an atom type number to atom name mapping.
|
||||||
|
|
||||||
.. versionadded:: 22Dec2022
|
.. versionadded:: 22Dec2022
|
||||||
|
|
||||||
The *grid/vtk* style writes VTK files for grid data on a regular
|
The *grid/vtk* style writes VTK files for grid data on a regular
|
||||||
@ -607,8 +628,8 @@ with the processor ID from :math:`0` to :math:`P-1`. For example,
|
|||||||
tmp.dump.% becomes tmp.dump.0, tmp.dump.1, ... tmp.dump.:math:`P-1`,
|
tmp.dump.% becomes tmp.dump.0, tmp.dump.1, ... tmp.dump.:math:`P-1`,
|
||||||
etc. This creates smaller files and can be a fast mode of output on
|
etc. This creates smaller files and can be a fast mode of output on
|
||||||
parallel machines that support parallel I/O for output. This option is
|
parallel machines that support parallel I/O for output. This option is
|
||||||
**not** available for the *dcd*, *xtc*, *xyz*, *grid/vtk*, and *yaml*
|
**not** available for the *dcd*, *extxyz*, *xtc*, *xyz*, *grid/vtk*, and
|
||||||
styles.
|
*yaml* styles.
|
||||||
|
|
||||||
By default, :math:`P` is the the number of processors, meaning one file per
|
By default, :math:`P` is the the number of processors, meaning one file per
|
||||||
processor, but :math:`P` can be set to a smaller value via the *nfile* or
|
processor, but :math:`P` can be set to a smaller value via the *nfile* or
|
||||||
@ -1017,9 +1038,9 @@ the COMPRESS package. They are only enabled if LAMMPS was built with
|
|||||||
that package. See the :doc:`Build package <Build_package>` page for
|
that package. See the :doc:`Build package <Build_package>` page for
|
||||||
more info.
|
more info.
|
||||||
|
|
||||||
The *xtc*, *dcd*, and *yaml* styles are part of the EXTRA-DUMP package.
|
The *dcd*, *extxyz*, *xtc*, and *yaml* styles are part of the EXTRA-DUMP
|
||||||
They are only enabled if LAMMPS was built with that package. See the
|
package. They are only enabled if LAMMPS was built with that package.
|
||||||
:doc:`Build package <Build_package>` page for more info.
|
See the :doc:`Build package <Build_package>` page for more info.
|
||||||
|
|
||||||
Related commands
|
Related commands
|
||||||
""""""""""""""""
|
""""""""""""""""
|
||||||
|
|||||||
@ -92,6 +92,15 @@ Syntax
|
|||||||
|
|
||||||
see the :doc:`dump image <dump_image>` doc page for details
|
see the :doc:`dump image <dump_image>` doc page for details
|
||||||
|
|
||||||
|
* these keywords apply only to the extxyz dump style
|
||||||
|
* keyword = *forces* or *mass* or *vel*
|
||||||
|
|
||||||
|
.. parsed-literal::
|
||||||
|
|
||||||
|
*forces* arg = *yes* or *no*
|
||||||
|
*mass* arg = *yes* or *no*
|
||||||
|
*vel* arg = *yes* or *no*
|
||||||
|
|
||||||
* these keywords apply only to the */gz* and */zstd* dump styles
|
* these keywords apply only to the */gz* and */zstd* dump styles
|
||||||
* keyword = *compression_level*
|
* keyword = *compression_level*
|
||||||
|
|
||||||
@ -972,9 +981,11 @@ The option defaults are
|
|||||||
* fileper = # of processors
|
* fileper = # of processors
|
||||||
* first = no
|
* first = no
|
||||||
* flush = yes
|
* flush = yes
|
||||||
|
* forces = yes
|
||||||
* format = %d and %g for each integer or floating point value
|
* format = %d and %g for each integer or floating point value
|
||||||
* image = no
|
* image = no
|
||||||
* label = ENTRIES
|
* label = ENTRIES
|
||||||
|
* mass = no
|
||||||
* maxfiles = -1
|
* maxfiles = -1
|
||||||
* nfile = 1
|
* nfile = 1
|
||||||
* pad = 0
|
* pad = 0
|
||||||
@ -990,6 +1001,7 @@ The option defaults are
|
|||||||
* types = numeric
|
* types = numeric
|
||||||
* units = no
|
* units = no
|
||||||
* unwrap = no
|
* unwrap = no
|
||||||
|
* vel = yes
|
||||||
|
|
||||||
* compression_level = 9 (gz variants)
|
* compression_level = 9 (gz variants)
|
||||||
* compression_level = 0 (zstd variants)
|
* compression_level = 0 (zstd variants)
|
||||||
|
|||||||
@ -365,6 +365,7 @@ accelerated styles exist.
|
|||||||
* :doc:`qeq/fire <fix_qeq>` - charge equilibration via FIRE minimizer
|
* :doc:`qeq/fire <fix_qeq>` - charge equilibration via FIRE minimizer
|
||||||
* :doc:`qeq/point <fix_qeq>` - charge equilibration via point method
|
* :doc:`qeq/point <fix_qeq>` - charge equilibration via point method
|
||||||
* :doc:`qeq/reaxff <fix_qeq_reaxff>` - charge equilibration for ReaxFF potential
|
* :doc:`qeq/reaxff <fix_qeq_reaxff>` - charge equilibration for ReaxFF potential
|
||||||
|
* :doc:`qeq/rel/reaxff <fix_qeq_rel_reaxff>` - charge equilibration for ReaxFF potential with alternate efield implementation
|
||||||
* :doc:`qeq/shielded <fix_qeq>` - charge equilibration via shielded method
|
* :doc:`qeq/shielded <fix_qeq>` - charge equilibration via shielded method
|
||||||
* :doc:`qeq/slater <fix_qeq>` - charge equilibration via Slater method
|
* :doc:`qeq/slater <fix_qeq>` - charge equilibration via Slater method
|
||||||
* :doc:`qmmm <fix_qmmm>` - functionality to enable a quantum mechanics/molecular mechanics coupling
|
* :doc:`qmmm <fix_qmmm>` - functionality to enable a quantum mechanics/molecular mechanics coupling
|
||||||
|
|||||||
@ -123,8 +123,10 @@ components in non-periodic directions.
|
|||||||
Related commands
|
Related commands
|
||||||
""""""""""""""""
|
""""""""""""""""
|
||||||
|
|
||||||
:doc:`pair_style reaxff <pair_reaxff>`, :doc:`fix qeq/reaxff <fix_qeq_reaxff>`,
|
:doc:`pair_style reaxff <pair_reaxff>`,
|
||||||
:doc:`fix qtpi/reaxff <fix_qtpie_reaxff>`
|
:doc:`fix qeq/reaxff <fix_qeq_reaxff>`,
|
||||||
|
:doc:`fix qtpie/reaxff <fix_qtpie_reaxff>`,
|
||||||
|
:doc:`fix qeq/rel/reaxff <fix_qeq_rel_reaxff>`
|
||||||
|
|
||||||
Default
|
Default
|
||||||
"""""""
|
"""""""
|
||||||
|
|||||||
@ -14,7 +14,7 @@ Syntax
|
|||||||
* adapt = style name of this fix command
|
* adapt = style name of this fix command
|
||||||
* N = adapt simulation settings every this many timesteps
|
* N = adapt simulation settings every this many timesteps
|
||||||
* one or more attribute/arg pairs may be appended
|
* one or more attribute/arg pairs may be appended
|
||||||
* attribute = *pair* or *bond* or *angle* or *kspace* or *atom*
|
* attribute = *pair* or *bond* or *angle* or *improper* or *kspace* or *atom*
|
||||||
|
|
||||||
.. parsed-literal::
|
.. parsed-literal::
|
||||||
|
|
||||||
@ -33,6 +33,11 @@ Syntax
|
|||||||
aparam = parameter to adapt over time
|
aparam = parameter to adapt over time
|
||||||
I = type angle to set parameter for (integer or type label)
|
I = type angle to set parameter for (integer or type label)
|
||||||
v_name = variable with name that calculates value of aparam
|
v_name = variable with name that calculates value of aparam
|
||||||
|
*improper* args = istyle iparam I v_name
|
||||||
|
istyle = improper style name (e.g., cvff)
|
||||||
|
iparam = parameter to adapt over time
|
||||||
|
I = type improper to set parameter for (integer or type label)
|
||||||
|
v_name = variable with name that calculates value of iparam
|
||||||
*kspace* arg = v_name
|
*kspace* arg = v_name
|
||||||
v_name = variable with name that calculates scale factor on :math:`k`-space terms
|
v_name = variable with name that calculates scale factor on :math:`k`-space terms
|
||||||
*atom* args = atomparam v_name
|
*atom* args = atomparam v_name
|
||||||
@ -428,6 +433,56 @@ this fix uses to reset theta0 needs to generate values in radians.
|
|||||||
|
|
||||||
----------
|
----------
|
||||||
|
|
||||||
|
.. versionadded:: TBD
|
||||||
|
|
||||||
|
The *improper* keyword uses the specified variable to change the value of
|
||||||
|
an improper coefficient over time, very similar to how the *angle* keyword
|
||||||
|
operates. The only difference is that now an improper coefficient for a
|
||||||
|
given improper type is adapted.
|
||||||
|
|
||||||
|
A wild-card asterisk can be used in place of or in conjunction with the
|
||||||
|
improper type argument to set the coefficients for multiple improper types.
|
||||||
|
This takes the form "\*" or "\*n" or "m\*" or "m\*n". If :math:`N` is
|
||||||
|
the number of improper types, then an asterisk with no numeric values means
|
||||||
|
all types from 1 to :math:`N`. A leading asterisk means all types from
|
||||||
|
1 to n (inclusive). A trailing asterisk means all types from m to
|
||||||
|
:math:`N` (inclusive). A middle asterisk means all types from m to n
|
||||||
|
(inclusive).
|
||||||
|
|
||||||
|
If :doc:`improper_style hybrid <improper_hybrid>` is used, *istyle* should be a
|
||||||
|
sub-style name. The improper styles that currently work with fix adapt are:
|
||||||
|
|
||||||
|
+---------------------------------------------------------+----------------+----------------+
|
||||||
|
| :doc:`amoeba <improper_amoeba>` | k | type impropers |
|
||||||
|
+---------------------------------------------------------+----------------+----------------+
|
||||||
|
| :doc:`class2 <improper_class2>` | k,chi0 | type impropers |
|
||||||
|
+---------------------------------------------------------+----------------+----------------+
|
||||||
|
| :doc:`cossq <improper_cossq>` | k,chi0 | type impropers |
|
||||||
|
+---------------------------------------------------------+----------------+----------------+
|
||||||
|
| :doc:`cvff <improper_cvff>` | k,d,n | type impropers |
|
||||||
|
+---------------------------------------------------------+----------------+----------------+
|
||||||
|
| :doc:`distance <improper_distance>` | k2,k4 | type impropers |
|
||||||
|
+---------------------------------------------------------+----------------+----------------+
|
||||||
|
| :doc:`distharm <improper_distharm>` | k,d0 | type impropers |
|
||||||
|
+---------------------------------------------------------+----------------+----------------+
|
||||||
|
| :doc:`fourier <improper_fourier>` | k,C0,C1,C2 | type impropers |
|
||||||
|
+---------------------------------------------------------+----------------+----------------+
|
||||||
|
| :doc:`harmonic <improper_harmonic>` | k,chi0 | type impropers |
|
||||||
|
+---------------------------------------------------------+----------------+----------------+
|
||||||
|
| :doc:`inversion/harmonic <improper_inversion_harmonic>` | k,w0 | type impropers |
|
||||||
|
+---------------------------------------------------------+----------------+----------------+
|
||||||
|
| :doc:`ring <improper_ring>` | k,theta0 | type impropers |
|
||||||
|
+---------------------------------------------------------+----------------+----------------+
|
||||||
|
| :doc:`umbrella <improper_umbrella>` | k,w0 | type impropers |
|
||||||
|
+---------------------------------------------------------+----------------+----------------+
|
||||||
|
| :doc:`sqdistharm <improper_sqdistharm>` | k | type impropers |
|
||||||
|
+---------------------------------------------------------+----------------+----------------+
|
||||||
|
|
||||||
|
Note that internally, chi0 and theta0 are stored in radians, so the variable
|
||||||
|
this fix use to reset chi0 or theta0 needs to generate values in radians.
|
||||||
|
|
||||||
|
----------
|
||||||
|
|
||||||
The *kspace* keyword used the specified variable as a scale factor on
|
The *kspace* keyword used the specified variable as a scale factor on
|
||||||
the energy, forces, virial calculated by whatever :math:`k`-space solver is
|
the energy, forces, virial calculated by whatever :math:`k`-space solver is
|
||||||
defined by the :doc:`kspace_style <kspace_style>` command. If the
|
defined by the :doc:`kspace_style <kspace_style>` command. If the
|
||||||
|
|||||||
@ -59,7 +59,7 @@ extracted from the :doc:`pair_style reaxff <pair_reaxff>` command and
|
|||||||
the ReaxFF force field file it reads in. If a file name is specified
|
the ReaxFF force field file it reads in. If a file name is specified
|
||||||
for *params*, then the parameters are taken from the specified file
|
for *params*, then the parameters are taken from the specified file
|
||||||
and the file must contain one line for each atom type. The latter
|
and the file must contain one line for each atom type. The latter
|
||||||
form must be used when performing QeQ with a non-ReaxFF potential.
|
form must be used when performing QEq with a non-ReaxFF potential.
|
||||||
Each line should be formatted as follows:
|
Each line should be formatted as follows:
|
||||||
|
|
||||||
.. parsed-literal::
|
.. parsed-literal::
|
||||||
@ -140,7 +140,8 @@ Related commands
|
|||||||
""""""""""""""""
|
""""""""""""""""
|
||||||
|
|
||||||
:doc:`pair_style reaxff <pair_reaxff>`, :doc:`fix qeq/shielded <fix_qeq>`,
|
:doc:`pair_style reaxff <pair_reaxff>`, :doc:`fix qeq/shielded <fix_qeq>`,
|
||||||
:doc:`fix acks2/reaxff <fix_acks2_reaxff>`, :doc:`fix qtpie/reaxff <fix_qtpie_reaxff>`
|
:doc:`fix acks2/reaxff <fix_acks2_reaxff>`, :doc:`fix qtpie/reaxff <fix_qtpie_reaxff>`,
|
||||||
|
:doc:`fix qeq/rel/reaxff <fix_qeq_rel_reaxff>`
|
||||||
|
|
||||||
Default
|
Default
|
||||||
"""""""
|
"""""""
|
||||||
|
|||||||
195
doc/src/fix_qeq_rel_reaxff.rst
Normal file
195
doc/src/fix_qeq_rel_reaxff.rst
Normal file
@ -0,0 +1,195 @@
|
|||||||
|
.. index:: fix qeq/rel/reaxff
|
||||||
|
|
||||||
|
fix qeq/rel/reaxff command
|
||||||
|
==========================
|
||||||
|
|
||||||
|
Syntax
|
||||||
|
""""""
|
||||||
|
|
||||||
|
.. code-block:: LAMMPS
|
||||||
|
|
||||||
|
fix ID group-ID qeq/rel/reaxff Nevery cutlo cuthi tolerance params gfile args
|
||||||
|
|
||||||
|
* ID, group-ID are documented in :doc:`fix <fix>` command
|
||||||
|
* qeq/rel/reaxff = style name of this fix command
|
||||||
|
* Nevery = perform QEqR every this many steps
|
||||||
|
* cutlo,cuthi = lo and hi cutoff for Taper radius
|
||||||
|
* tolerance = precision to which charges will be equilibrated
|
||||||
|
* params = reaxff or a filename
|
||||||
|
* gfile = the name of a file containing Gaussian orbital exponents
|
||||||
|
* one or more keywords or keyword/value pairs may be appended
|
||||||
|
|
||||||
|
.. parsed-literal::
|
||||||
|
|
||||||
|
keyword = *scale* or *maxiter* or *nowarn*
|
||||||
|
*scale* beta = set value of scaling factor *beta* (determines strength of electric polarization)
|
||||||
|
*maxiter* N = limit the number of iterations to *N*
|
||||||
|
*nowarn* = do not print a warning message if the maximum number of iterations is reached
|
||||||
|
|
||||||
|
Examples
|
||||||
|
""""""""
|
||||||
|
|
||||||
|
.. code-block:: LAMMPS
|
||||||
|
|
||||||
|
fix 1 all qeq/rel/reaxff 1 0.0 10.0 1.0e-6 reaxff exp.qeqr
|
||||||
|
fix 1 all qeq/rel/reaxff 1 0.0 10.0 1.0e-6 params.qeqr exp.qeqr scale 1.5 maxiter 500 nowarn
|
||||||
|
|
||||||
|
Description
|
||||||
|
"""""""""""
|
||||||
|
|
||||||
|
.. versionadded:: 19Nov2024
|
||||||
|
|
||||||
|
This fix implements the QEqR method for charge equilibration, which
|
||||||
|
differs from the QEq charge equilibration method :ref:`(Rappe and
|
||||||
|
Goddard) <Rappe4>` only in how external electric fields are accounted
|
||||||
|
for. This fix therefore raises a warning when used without :doc:`fix
|
||||||
|
efield <fix_efield>` since :doc:`fix qeq/reaxff <fix_qeq_reaxff>` should
|
||||||
|
be used without an external electric field. Charges are computed with
|
||||||
|
the QEqR method by minimizing the electrostatic energy of the system in
|
||||||
|
the same way as the QEq method but where the absolute electronegativity,
|
||||||
|
:math:`\chi_i`, of each atom in the QEq method is replaced with an
|
||||||
|
effective electronegativity given by
|
||||||
|
|
||||||
|
.. math::
|
||||||
|
\chi_{\mathrm{r}i} = \chi_i + \frac{\sum_{j=1}^{N} \beta(\phi_i - \phi_j) S_{ij}}
|
||||||
|
{\sum_{m=1}^{N}S_{im}},
|
||||||
|
|
||||||
|
where :math:`N` is the number of atoms in the system, :math:`\beta` is a
|
||||||
|
scaling factor, :math:`\phi_i` and :math:`\phi_j` are the electric
|
||||||
|
potentials at the positions of atoms :math:`i` and :math:`j` due to the
|
||||||
|
external electric field and :math:`S_{ij}` is the overlap integral
|
||||||
|
between atoms :math:`i` and :math:`j`. This formulation is advantageous
|
||||||
|
over the method used by :doc:`fix qeq/reaxff <fix_qeq_reaxff>` to
|
||||||
|
account for an external electric field in that it permits periodic
|
||||||
|
boundaries in the direction of an external electric field and in that it
|
||||||
|
does not worsen long-range charge transfer seen with QEq.
|
||||||
|
|
||||||
|
This fix is typically used in conjunction with the ReaxFF force field
|
||||||
|
model as implemented in the :doc:`pair_style reaxff <pair_reaxff>`
|
||||||
|
command, but it can be used with any potential in LAMMPS, so long as it
|
||||||
|
defines and uses charges on each atom. For more technical details about
|
||||||
|
the charge equilibration performed by *fix qeq/rel/reaxff*, which is the
|
||||||
|
same as in :doc:`fix qeq/reaxff <fix_qeq_reaxff>` except for the use of
|
||||||
|
:math:`\chi_{\mathrm{r}i}`, please refer to :ref:`(Aktulga)
|
||||||
|
<qeq-Aktulga3>`. To be explicit, *fix qeq/rel/reaxff* replaces
|
||||||
|
:math:`\chi_k` of eq. 3 in :ref:`(Aktulga) <qeq-Aktulga3>` with
|
||||||
|
:math:`\chi_{\mathrm{r}k}` when an external electric field is applied.
|
||||||
|
|
||||||
|
This fix requires the absolute electronegativity, :math:`\chi`, in eV,
|
||||||
|
the self-Coulomb potential, :math:`\eta`, in eV, and the shielded
|
||||||
|
Coulomb constant, :math:`\gamma`, in :math:`\AA^{-1}`. If the *params*
|
||||||
|
setting above is the word "reaxff", then these are extracted from the
|
||||||
|
:doc:`pair_style reaxff <pair_reaxff>` command and the ReaxFF force
|
||||||
|
field file it reads in. If a file name is specified for *params*, then
|
||||||
|
the parameters are taken from the specified file and the file must
|
||||||
|
contain one line for each atom type. The latter form must be used when
|
||||||
|
using this fix with a non-ReaxFF potential. Each line should be
|
||||||
|
formatted as follows, ensuring that the parameters are given in units of
|
||||||
|
eV, eV, and :math:`\AA^{-1}`, respectively:
|
||||||
|
|
||||||
|
.. parsed-literal::
|
||||||
|
|
||||||
|
itype chi eta gamma
|
||||||
|
|
||||||
|
where *itype* is the atom type from 1 to Ntypes. Note that eta is
|
||||||
|
defined here as twice the eta value in the ReaxFF file.
|
||||||
|
|
||||||
|
The overlap integrals :math:`S_{ij}` are computed by using normalized 1s
|
||||||
|
Gaussian type orbitals. The Gaussian orbital exponents, :math:`\alpha`,
|
||||||
|
that are needed to compute the overlap integrals are taken from the file
|
||||||
|
given by *gfile*. This file must contain one line for each atom type
|
||||||
|
and provide the Gaussian orbital exponent for each atom type in units of
|
||||||
|
inverse square Bohr radius. Each line should be formatted as follows:
|
||||||
|
|
||||||
|
.. parsed-literal::
|
||||||
|
|
||||||
|
itype alpha
|
||||||
|
|
||||||
|
Empty lines or any text following the pound sign (#) are ignored. An
|
||||||
|
example *gfile* for a system with two atom types is
|
||||||
|
|
||||||
|
.. parsed-literal::
|
||||||
|
|
||||||
|
# An example gfile. Exponents are taken from Table 2.2 of Chen, J. (2009).
|
||||||
|
# Theory and applications of fluctuating-charge models.
|
||||||
|
# The units of the exponents are 1 / (Bohr radius)^2 .
|
||||||
|
1 0.2240 # O
|
||||||
|
2 0.5434 # H
|
||||||
|
|
||||||
|
The optional *scale* keyword sets the value of :math:`\beta` in the
|
||||||
|
equation for :math:`\chi_{\mathrm{r}i}`. The default value is 1.0.
|
||||||
|
|
||||||
|
The optional *maxiter* keyword allows changing the max number of
|
||||||
|
iterations in the linear solver. The default value is 200.
|
||||||
|
|
||||||
|
The optional *nowarn* keyword silences the warning message printed when
|
||||||
|
the maximum number of iterations is reached. This can be useful for
|
||||||
|
comparing serial and parallel results where having the same fixed number
|
||||||
|
of iterations is desired, which can be achieved by using a very small
|
||||||
|
tolerance and setting *maxiter* to the desired number of iterations.
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
|
||||||
|
In order to solve the self-consistent equations for electronegativity
|
||||||
|
equalization, LAMMPS imposes the additional constraint that all the
|
||||||
|
charges in the fix group must add up to zero. The initial charge
|
||||||
|
assignments should also satisfy this constraint. LAMMPS will print a
|
||||||
|
warning if that is not the case.
|
||||||
|
|
||||||
|
Restart, fix_modify, output, run start/stop, minimize info
|
||||||
|
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
|
||||||
|
|
||||||
|
No information about this fix is written to :doc:`binary restart files
|
||||||
|
<restart>`. This fix computes a global scalar (the number of
|
||||||
|
iterations) and a per-atom vector (the effective electronegativity),
|
||||||
|
which can be accessed by various :doc:`output commands <Howto_output>`.
|
||||||
|
No parameter of this fix can be used with the *start/stop* keywords of
|
||||||
|
the :doc:`run <run>` command.
|
||||||
|
|
||||||
|
This fix is invoked during :doc:`energy minimization <minimize>`.
|
||||||
|
|
||||||
|
Restrictions
|
||||||
|
""""""""""""
|
||||||
|
|
||||||
|
This fix is part of the REAXFF package. It is only enabled if LAMMPS
|
||||||
|
was built with that package. See the :doc:`Build package
|
||||||
|
<Build_package>` page for more info.
|
||||||
|
|
||||||
|
This fix does not correctly handle interactions involving multiple
|
||||||
|
periodic images of the same atom. Hence, it should not be used for
|
||||||
|
periodic cell dimensions smaller than the non-bonded cutoff radius,
|
||||||
|
which is typically :math:`10~\AA` for ReaxFF simulations.
|
||||||
|
|
||||||
|
This fix may be used in combination with :doc:`fix efield <fix_efield>`
|
||||||
|
and will apply the external electric field during charge equilibration,
|
||||||
|
but there may be only one fix efield instance used and the electric
|
||||||
|
field must be applied to all atoms in the system. Consequently, `fix
|
||||||
|
efield` must be used with *group-ID* all and must not be used with the
|
||||||
|
keyword *region*. Equal-style variables can be used for electric field
|
||||||
|
vector components without any further settings. Atom-style variables can
|
||||||
|
be used for spatially-varying electric field vector components, but the
|
||||||
|
resulting electric potential must be specified as an atom-style variable
|
||||||
|
using the *potential* keyword for `fix efield`.
|
||||||
|
|
||||||
|
Related commands
|
||||||
|
""""""""""""""""
|
||||||
|
|
||||||
|
:doc:`pair_style reaxff <pair_reaxff>`, :doc:`fix qeq/reaxff <fix_qeq_reaxff>`,
|
||||||
|
:doc:`fix acks2/reaxff <fix_acks2_reaxff>`, :doc:`fix qtpie/reaxff <fix_qtpie_reaxff>`
|
||||||
|
|
||||||
|
Default
|
||||||
|
"""""""
|
||||||
|
|
||||||
|
scale = 1.0 and maxiter = 200
|
||||||
|
|
||||||
|
----------
|
||||||
|
|
||||||
|
.. _Rappe4:
|
||||||
|
|
||||||
|
**(Rappe)** Rappe and Goddard III, Journal of Physical Chemistry, 95,
|
||||||
|
3358-3363 (1991).
|
||||||
|
|
||||||
|
.. _qeq-Aktulga3:
|
||||||
|
|
||||||
|
**(Aktulga)** Aktulga, Fogarty, Pandit, Grama, Parallel Computing, 38,
|
||||||
|
245-259 (2012).
|
||||||
@ -21,8 +21,10 @@ Syntax
|
|||||||
|
|
||||||
.. parsed-literal::
|
.. parsed-literal::
|
||||||
|
|
||||||
keyword = *maxiter*
|
keyword = *scale* or *maxiter* or *nowarn*
|
||||||
|
*scale* beta = set value of scaling factor *beta* (determines strength of electric polarization)
|
||||||
*maxiter* N = limit the number of iterations to *N*
|
*maxiter* N = limit the number of iterations to *N*
|
||||||
|
*nowarn* = do not print a warning message if the maximum number of iterations is reached
|
||||||
|
|
||||||
Examples
|
Examples
|
||||||
""""""""
|
""""""""
|
||||||
@ -30,7 +32,7 @@ Examples
|
|||||||
.. code-block:: LAMMPS
|
.. code-block:: LAMMPS
|
||||||
|
|
||||||
fix 1 all qtpie/reaxff 1 0.0 10.0 1.0e-6 reaxff exp.qtpie
|
fix 1 all qtpie/reaxff 1 0.0 10.0 1.0e-6 reaxff exp.qtpie
|
||||||
fix 1 all qtpie/reaxff 1 0.0 10.0 1.0e-6 params.qtpie exp.qtpie maxiter 500
|
fix 1 all qtpie/reaxff 1 0.0 10.0 1.0e-6 params.qtpie exp.qtpie scale 1.5 maxiter 500 nowarn
|
||||||
|
|
||||||
Description
|
Description
|
||||||
"""""""""""
|
"""""""""""
|
||||||
@ -46,7 +48,7 @@ same way as the QEq method but where the absolute electronegativity,
|
|||||||
electronegativity given by :ref:`(Chen) <qtpie-Chen>`
|
electronegativity given by :ref:`(Chen) <qtpie-Chen>`
|
||||||
|
|
||||||
.. math::
|
.. math::
|
||||||
\chi_{\mathrm{eff},i} = \frac{\sum_{j=1}^{N} (\chi_i - \chi_j) S_{ij}}
|
\tilde{\chi}_{i} = \frac{\sum_{j=1}^{N} (\chi_i - \chi_j) S_{ij}}
|
||||||
{\sum_{m=1}^{N}S_{im}},
|
{\sum_{m=1}^{N}S_{im}},
|
||||||
|
|
||||||
which acts to penalize long-range charge transfer seen with the QEq charge
|
which acts to penalize long-range charge transfer seen with the QEq charge
|
||||||
@ -61,11 +63,11 @@ electric field by using the effective electronegativity given in
|
|||||||
:ref:`(Gergs) <Gergs>`:
|
:ref:`(Gergs) <Gergs>`:
|
||||||
|
|
||||||
.. math::
|
.. math::
|
||||||
\chi_{\mathrm{eff},i} = \frac{\sum_{j=1}^{N} (\chi_i - \chi_j + \phi_i - \phi_j) S_{ij}}
|
\tilde{\chi}_{\mathrm{r}i} = \frac{\sum_{j=1}^{N} (\chi_i - \chi_j + \beta(\phi_i - \phi_j)) S_{ij}}
|
||||||
{\sum_{m=1}^{N}S_{im}},
|
{\sum_{m=1}^{N}S_{im}},
|
||||||
|
|
||||||
where :math:`\phi_i` and :math:`\phi_j` are the electric
|
where :math:`\beta` is a scaling factor and :math:`\phi_i` and :math:`\phi_j`
|
||||||
potentials at the positions of atom :math:`i` and :math:`j`
|
are the electric potentials at the positions of atoms :math:`i` and :math:`j`
|
||||||
due to the external electric field.
|
due to the external electric field.
|
||||||
|
|
||||||
This fix is typically used in conjunction with the ReaxFF force
|
This fix is typically used in conjunction with the ReaxFF force
|
||||||
@ -74,9 +76,12 @@ command, but it can be used with any potential in LAMMPS, so long as it
|
|||||||
defines and uses charges on each atom. For more technical details about the
|
defines and uses charges on each atom. For more technical details about the
|
||||||
charge equilibration performed by `fix qtpie/reaxff`, which is the same as in
|
charge equilibration performed by `fix qtpie/reaxff`, which is the same as in
|
||||||
:doc:`fix qeq/reaxff <fix_qeq_reaxff>` except for the use of
|
:doc:`fix qeq/reaxff <fix_qeq_reaxff>` except for the use of
|
||||||
:math:`\chi_{\mathrm{eff},i}`, please refer to :ref:`(Aktulga) <qeq-Aktulga2>`.
|
:math:`\tilde{\chi}_{i}` or :math:`\tilde{\chi}_{\mathrm{r}i}`,
|
||||||
|
please refer to :ref:`(Aktulga) <qeq-Aktulga2>`.
|
||||||
To be explicit, this fix replaces :math:`\chi_k` of eq. 3 in
|
To be explicit, this fix replaces :math:`\chi_k` of eq. 3 in
|
||||||
:ref:`(Aktulga) <qeq-Aktulga2>` with :math:`\chi_{\mathrm{eff},k}`.
|
:ref:`(Aktulga) <qeq-Aktulga2>` with :math:`\tilde{\chi}_{k}` when no external
|
||||||
|
electric field is applied and with :math:`\tilde{\chi}_{\mathrm{r}k}` when an
|
||||||
|
external electric field is applied.
|
||||||
|
|
||||||
This fix requires the absolute electronegativity, :math:`\chi`, in eV, the
|
This fix requires the absolute electronegativity, :math:`\chi`, in eV, the
|
||||||
self-Coulomb potential, :math:`\eta`, in eV, and the shielded Coulomb
|
self-Coulomb potential, :math:`\eta`, in eV, and the shielded Coulomb
|
||||||
@ -97,7 +102,7 @@ respectively:
|
|||||||
where *itype* is the atom type from 1 to Ntypes. Note that eta is
|
where *itype* is the atom type from 1 to Ntypes. Note that eta is
|
||||||
defined here as twice the eta value in the ReaxFF file.
|
defined here as twice the eta value in the ReaxFF file.
|
||||||
|
|
||||||
The overlap integrals in the equation for :math:`\chi_{\mathrm{eff},i}`
|
The overlap integrals :math:`S_{ij}`
|
||||||
are computed by using normalized 1s Gaussian type orbitals. The Gaussian
|
are computed by using normalized 1s Gaussian type orbitals. The Gaussian
|
||||||
orbital exponents, :math:`\alpha`, that are needed to compute the overlap
|
orbital exponents, :math:`\alpha`, that are needed to compute the overlap
|
||||||
integrals are taken from the file given by *gfile*.
|
integrals are taken from the file given by *gfile*.
|
||||||
@ -120,9 +125,20 @@ Empty lines or any text following the pound sign (#) are ignored. An example
|
|||||||
1 0.2240 # O
|
1 0.2240 # O
|
||||||
2 0.5434 # H
|
2 0.5434 # H
|
||||||
|
|
||||||
|
The optional *scale* keyword sets the value of :math:`\beta` in the equation for
|
||||||
|
:math:`\tilde{\chi}_{\mathrm{r}i}`. This keyword only affects the computed charges
|
||||||
|
when :doc:`fix efield <fix_efield>` is used. The default value is 1.0.
|
||||||
|
|
||||||
The optional *maxiter* keyword allows changing the max number
|
The optional *maxiter* keyword allows changing the max number
|
||||||
of iterations in the linear solver. The default value is 200.
|
of iterations in the linear solver. The default value is 200.
|
||||||
|
|
||||||
|
The optional *nowarn* keyword silences the warning message printed
|
||||||
|
when the maximum number of iterations is reached. This can be
|
||||||
|
useful for comparing serial and parallel results where having the
|
||||||
|
same fixed number of iterations is desired, which can be achieved
|
||||||
|
by using a very small tolerance and setting *maxiter* to the desired
|
||||||
|
number of iterations.
|
||||||
|
|
||||||
.. note::
|
.. note::
|
||||||
|
|
||||||
In order to solve the self-consistent equations for electronegativity
|
In order to solve the self-consistent equations for electronegativity
|
||||||
@ -170,12 +186,13 @@ Related commands
|
|||||||
""""""""""""""""
|
""""""""""""""""
|
||||||
|
|
||||||
:doc:`pair_style reaxff <pair_reaxff>`, :doc:`fix qeq/reaxff <fix_qeq_reaxff>`,
|
:doc:`pair_style reaxff <pair_reaxff>`, :doc:`fix qeq/reaxff <fix_qeq_reaxff>`,
|
||||||
:doc:`fix acks2/reaxff <fix_acks2_reaxff>`
|
:doc:`fix acks2/reaxff <fix_acks2_reaxff>`,
|
||||||
|
:doc:`fix qeq/rel/reaxff <fix_qeq_rel_reaxff>`
|
||||||
|
|
||||||
Default
|
Default
|
||||||
"""""""
|
"""""""
|
||||||
|
|
||||||
maxiter 200
|
scale = 1.0 and maxiter = 200
|
||||||
|
|
||||||
----------
|
----------
|
||||||
|
|
||||||
|
|||||||
@ -725,6 +725,7 @@ dashpot
|
|||||||
dat
|
dat
|
||||||
datafile
|
datafile
|
||||||
datatype
|
datatype
|
||||||
|
dataset
|
||||||
datums
|
datums
|
||||||
Davidchack
|
Davidchack
|
||||||
Daw
|
Daw
|
||||||
@ -3120,9 +3121,11 @@ qE
|
|||||||
qeff
|
qeff
|
||||||
qelectron
|
qelectron
|
||||||
qeq
|
qeq
|
||||||
|
qeqr
|
||||||
Qamar
|
Qamar
|
||||||
QeQ
|
QeQ
|
||||||
QEq
|
QEq
|
||||||
|
QEqR
|
||||||
qfactor
|
qfactor
|
||||||
qfile
|
qfile
|
||||||
qi
|
qi
|
||||||
|
|||||||
29
examples/reaxff/water/in.water.qeqr
Normal file
29
examples/reaxff/water/in.water.qeqr
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
# Water with QEqR
|
||||||
|
|
||||||
|
boundary p p p
|
||||||
|
units real
|
||||||
|
atom_style charge
|
||||||
|
|
||||||
|
read_data data.water
|
||||||
|
|
||||||
|
variable x index 1
|
||||||
|
variable y index 1
|
||||||
|
variable z index 1
|
||||||
|
|
||||||
|
replicate $x $y $z
|
||||||
|
|
||||||
|
pair_style reaxff NULL safezone 3.0 mincap 150
|
||||||
|
pair_coeff * * qeq_ff.water O H
|
||||||
|
neighbor 0.5 bin
|
||||||
|
neigh_modify every 1 delay 0 check yes
|
||||||
|
|
||||||
|
velocity all create 300.0 4928459 rot yes dist gaussian
|
||||||
|
|
||||||
|
fix 1 all qeqr/reaxff 1 0.0 10.0 1.0e-6 reaxff gauss_exp.txt
|
||||||
|
fix 2 all nvt temp 300 300 50.0
|
||||||
|
|
||||||
|
timestep 0.5
|
||||||
|
thermo 10
|
||||||
|
thermo_style custom step temp press density vol
|
||||||
|
|
||||||
|
run 20
|
||||||
30
examples/reaxff/water/in.water.qeqr.field
Normal file
30
examples/reaxff/water/in.water.qeqr.field
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
# Water with QEqR
|
||||||
|
|
||||||
|
boundary p p p
|
||||||
|
units real
|
||||||
|
atom_style charge
|
||||||
|
|
||||||
|
read_data data.water
|
||||||
|
|
||||||
|
variable x index 1
|
||||||
|
variable y index 1
|
||||||
|
variable z index 1
|
||||||
|
|
||||||
|
replicate $x $y $z
|
||||||
|
|
||||||
|
pair_style reaxff NULL safezone 3.0 mincap 150
|
||||||
|
pair_coeff * * qeq_ff.water O H
|
||||||
|
neighbor 0.5 bin
|
||||||
|
neigh_modify every 1 delay 0 check yes
|
||||||
|
|
||||||
|
velocity all create 300.0 4928459 rot yes dist gaussian
|
||||||
|
|
||||||
|
fix 1 all qeqr/reaxff 1 0.0 10.0 1.0e-6 reaxff gauss_exp.txt
|
||||||
|
fix 2 all nvt temp 300 300 50.0
|
||||||
|
fix 3 all efield 0.0 0.0 0.05
|
||||||
|
|
||||||
|
timestep 0.5
|
||||||
|
thermo 10
|
||||||
|
thermo_style custom step temp press density vol
|
||||||
|
|
||||||
|
run 20
|
||||||
115
examples/reaxff/water/log.20Mar25.reaxff.water-qeqr-field.g++.1
Normal file
115
examples/reaxff/water/log.20Mar25.reaxff.water-qeqr-field.g++.1
Normal file
@ -0,0 +1,115 @@
|
|||||||
|
LAMMPS (4 Feb 2025 - Development - patch_4Feb2025-444-gbb8b6590d5-modified)
|
||||||
|
using 1 OpenMP thread(s) per MPI task
|
||||||
|
# Water with QEqR
|
||||||
|
|
||||||
|
boundary p p p
|
||||||
|
units real
|
||||||
|
atom_style charge
|
||||||
|
|
||||||
|
read_data data.water
|
||||||
|
Reading data file ...
|
||||||
|
orthogonal box = (0 0 0) to (31.043046 31.043046 31.043046)
|
||||||
|
1 by 1 by 1 MPI processor grid
|
||||||
|
reading atoms ...
|
||||||
|
3000 atoms
|
||||||
|
read_data CPU = 0.053 seconds
|
||||||
|
|
||||||
|
variable x index 1
|
||||||
|
variable y index 1
|
||||||
|
variable z index 1
|
||||||
|
|
||||||
|
replicate $x $y $z
|
||||||
|
replicate 1 $y $z
|
||||||
|
replicate 1 1 $z
|
||||||
|
replicate 1 1 1
|
||||||
|
Replication is creating a 1x1x1 = 1 times larger system...
|
||||||
|
orthogonal box = (0 0 0) to (31.043046 31.043046 31.043046)
|
||||||
|
1 by 1 by 1 MPI processor grid
|
||||||
|
3000 atoms
|
||||||
|
replicate CPU = 0.001 seconds
|
||||||
|
|
||||||
|
pair_style reaxff NULL safezone 3.0 mincap 150
|
||||||
|
pair_coeff * * qeq_ff.water O H
|
||||||
|
WARNING: Changed valency_val to valency_boc for X (src/REAXFF/reaxff_ffield.cpp:300)
|
||||||
|
neighbor 0.5 bin
|
||||||
|
neigh_modify every 1 delay 0 check yes
|
||||||
|
|
||||||
|
velocity all create 300.0 4928459 rot yes dist gaussian
|
||||||
|
|
||||||
|
fix 1 all qeqr/reaxff 1 0.0 10.0 1.0e-6 reaxff gauss_exp.txt
|
||||||
|
fix 2 all nvt temp 300 300 50.0
|
||||||
|
fix 3 all efield 0.0 0.0 0.05
|
||||||
|
|
||||||
|
timestep 0.5
|
||||||
|
thermo 10
|
||||||
|
thermo_style custom step temp press density vol
|
||||||
|
|
||||||
|
run 20
|
||||||
|
|
||||||
|
CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
|
||||||
|
|
||||||
|
Your simulation uses code contributions which should be cited:
|
||||||
|
|
||||||
|
- pair reaxff command: doi:10.1016/j.parco.2011.08.005
|
||||||
|
|
||||||
|
@Article{Aktulga12,
|
||||||
|
author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama},
|
||||||
|
title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques},
|
||||||
|
journal = {Parallel Computing},
|
||||||
|
year = 2012,
|
||||||
|
volume = 38,
|
||||||
|
number = {4--5},
|
||||||
|
pages = {245--259}
|
||||||
|
}
|
||||||
|
|
||||||
|
CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
|
||||||
|
|
||||||
|
Neighbor list info ...
|
||||||
|
update: every = 1 steps, delay = 0 steps, check = yes
|
||||||
|
max neighbors/atom: 2000, page size: 100000
|
||||||
|
master list distance cutoff = 10.5
|
||||||
|
ghost atom cutoff = 10.5
|
||||||
|
binsize = 5.25, bins = 6 6 6
|
||||||
|
2 neighbor lists, perpetual/occasional/extra = 2 0 0
|
||||||
|
(1) pair reaxff, perpetual
|
||||||
|
attributes: half, newton off, ghost
|
||||||
|
pair build: half/bin/ghost/newtoff
|
||||||
|
stencil: full/ghost/bin/3d
|
||||||
|
bin: standard
|
||||||
|
(2) fix qeqr/reaxff, perpetual, copy from (1)
|
||||||
|
attributes: half, newton off
|
||||||
|
pair build: copy
|
||||||
|
stencil: none
|
||||||
|
bin: none
|
||||||
|
Per MPI rank memory allocation (min/avg/max) = 539.2 | 539.2 | 539.2 Mbytes
|
||||||
|
Step Temp Press Density Volume
|
||||||
|
0 300 778.75601 1 29915.273
|
||||||
|
10 301.42845 5423.6612 1 29915.273
|
||||||
|
20 298.24707 1549.2257 1 29915.273
|
||||||
|
Loop time of 10.6859 on 1 procs for 20 steps with 3000 atoms
|
||||||
|
|
||||||
|
Performance: 0.081 ns/day, 296.830 hours/ns, 1.872 timesteps/s, 5.615 katom-step/s
|
||||||
|
100.0% CPU use with 1 MPI tasks x 1 OpenMP threads
|
||||||
|
|
||||||
|
MPI task timing breakdown:
|
||||||
|
Section | min time | avg time | max time |%varavg| %total
|
||||||
|
---------------------------------------------------------------
|
||||||
|
Pair | 4.7595 | 4.7595 | 4.7595 | 0.0 | 44.54
|
||||||
|
Neigh | 0.17605 | 0.17605 | 0.17605 | 0.0 | 1.65
|
||||||
|
Comm | 0.0017511 | 0.0017511 | 0.0017511 | 0.0 | 0.02
|
||||||
|
Output | 8.3809e-05 | 8.3809e-05 | 8.3809e-05 | 0.0 | 0.00
|
||||||
|
Modify | 5.748 | 5.748 | 5.748 | 0.0 | 53.79
|
||||||
|
Other | | 0.0005279 | | | 0.00
|
||||||
|
|
||||||
|
Nlocal: 3000 ave 3000 max 3000 min
|
||||||
|
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||||
|
Nghost: 11075 ave 11075 max 11075 min
|
||||||
|
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||||
|
Neighs: 971785 ave 971785 max 971785 min
|
||||||
|
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||||
|
|
||||||
|
Total # of neighbors = 971785
|
||||||
|
Ave neighs/atom = 323.92833
|
||||||
|
Neighbor list builds = 2
|
||||||
|
Dangerous builds = 0
|
||||||
|
Total wall time: 0:00:12
|
||||||
115
examples/reaxff/water/log.20Mar25.reaxff.water-qeqr-field.g++.4
Normal file
115
examples/reaxff/water/log.20Mar25.reaxff.water-qeqr-field.g++.4
Normal file
@ -0,0 +1,115 @@
|
|||||||
|
LAMMPS (4 Feb 2025 - Development - patch_4Feb2025-444-gbb8b6590d5-modified)
|
||||||
|
using 1 OpenMP thread(s) per MPI task
|
||||||
|
# Water with QEqR
|
||||||
|
|
||||||
|
boundary p p p
|
||||||
|
units real
|
||||||
|
atom_style charge
|
||||||
|
|
||||||
|
read_data data.water
|
||||||
|
Reading data file ...
|
||||||
|
orthogonal box = (0 0 0) to (31.043046 31.043046 31.043046)
|
||||||
|
1 by 2 by 2 MPI processor grid
|
||||||
|
reading atoms ...
|
||||||
|
3000 atoms
|
||||||
|
read_data CPU = 0.053 seconds
|
||||||
|
|
||||||
|
variable x index 1
|
||||||
|
variable y index 1
|
||||||
|
variable z index 1
|
||||||
|
|
||||||
|
replicate $x $y $z
|
||||||
|
replicate 1 $y $z
|
||||||
|
replicate 1 1 $z
|
||||||
|
replicate 1 1 1
|
||||||
|
Replication is creating a 1x1x1 = 1 times larger system...
|
||||||
|
orthogonal box = (0 0 0) to (31.043046 31.043046 31.043046)
|
||||||
|
1 by 2 by 2 MPI processor grid
|
||||||
|
3000 atoms
|
||||||
|
replicate CPU = 0.002 seconds
|
||||||
|
|
||||||
|
pair_style reaxff NULL safezone 3.0 mincap 150
|
||||||
|
pair_coeff * * qeq_ff.water O H
|
||||||
|
WARNING: Changed valency_val to valency_boc for X (src/REAXFF/reaxff_ffield.cpp:300)
|
||||||
|
neighbor 0.5 bin
|
||||||
|
neigh_modify every 1 delay 0 check yes
|
||||||
|
|
||||||
|
velocity all create 300.0 4928459 rot yes dist gaussian
|
||||||
|
|
||||||
|
fix 1 all qeqr/reaxff 1 0.0 10.0 1.0e-6 reaxff gauss_exp.txt
|
||||||
|
fix 2 all nvt temp 300 300 50.0
|
||||||
|
fix 3 all efield 0.0 0.0 0.05
|
||||||
|
|
||||||
|
timestep 0.5
|
||||||
|
thermo 10
|
||||||
|
thermo_style custom step temp press density vol
|
||||||
|
|
||||||
|
run 20
|
||||||
|
|
||||||
|
CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
|
||||||
|
|
||||||
|
Your simulation uses code contributions which should be cited:
|
||||||
|
|
||||||
|
- pair reaxff command: doi:10.1016/j.parco.2011.08.005
|
||||||
|
|
||||||
|
@Article{Aktulga12,
|
||||||
|
author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama},
|
||||||
|
title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques},
|
||||||
|
journal = {Parallel Computing},
|
||||||
|
year = 2012,
|
||||||
|
volume = 38,
|
||||||
|
number = {4--5},
|
||||||
|
pages = {245--259}
|
||||||
|
}
|
||||||
|
|
||||||
|
CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
|
||||||
|
|
||||||
|
Neighbor list info ...
|
||||||
|
update: every = 1 steps, delay = 0 steps, check = yes
|
||||||
|
max neighbors/atom: 2000, page size: 100000
|
||||||
|
master list distance cutoff = 10.5
|
||||||
|
ghost atom cutoff = 10.5
|
||||||
|
binsize = 5.25, bins = 6 6 6
|
||||||
|
2 neighbor lists, perpetual/occasional/extra = 2 0 0
|
||||||
|
(1) pair reaxff, perpetual
|
||||||
|
attributes: half, newton off, ghost
|
||||||
|
pair build: half/bin/ghost/newtoff
|
||||||
|
stencil: full/ghost/bin/3d
|
||||||
|
bin: standard
|
||||||
|
(2) fix qeqr/reaxff, perpetual, copy from (1)
|
||||||
|
attributes: half, newton off
|
||||||
|
pair build: copy
|
||||||
|
stencil: none
|
||||||
|
bin: none
|
||||||
|
Per MPI rank memory allocation (min/avg/max) = 260.5 | 262.2 | 263.6 Mbytes
|
||||||
|
Step Temp Press Density Volume
|
||||||
|
0 300 778.75601 1 29915.273
|
||||||
|
10 301.42845 5423.6623 1 29915.273
|
||||||
|
20 298.24708 1549.2264 1 29915.273
|
||||||
|
Loop time of 3.10467 on 4 procs for 20 steps with 3000 atoms
|
||||||
|
|
||||||
|
Performance: 0.278 ns/day, 86.241 hours/ns, 6.442 timesteps/s, 19.326 katom-step/s
|
||||||
|
99.6% CPU use with 4 MPI tasks x 1 OpenMP threads
|
||||||
|
|
||||||
|
MPI task timing breakdown:
|
||||||
|
Section | min time | avg time | max time |%varavg| %total
|
||||||
|
---------------------------------------------------------------
|
||||||
|
Pair | 1.668 | 1.6843 | 1.7266 | 1.9 | 54.25
|
||||||
|
Neigh | 0.08549 | 0.086004 | 0.086638 | 0.2 | 2.77
|
||||||
|
Comm | 0.0135 | 0.055821 | 0.072105 | 10.4 | 1.80
|
||||||
|
Output | 4.9632e-05 | 5.4515e-05 | 6.8384e-05 | 0.0 | 0.00
|
||||||
|
Modify | 1.2774 | 1.2781 | 1.2786 | 0.0 | 41.17
|
||||||
|
Other | | 0.000458 | | | 0.01
|
||||||
|
|
||||||
|
Nlocal: 750 ave 760 max 735 min
|
||||||
|
Histogram: 1 0 0 0 1 0 0 0 0 2
|
||||||
|
Nghost: 6230.75 ave 6255 max 6191 min
|
||||||
|
Histogram: 1 0 0 0 0 1 0 0 1 1
|
||||||
|
Neighs: 276996 ave 280553 max 271385 min
|
||||||
|
Histogram: 1 0 0 0 0 1 0 0 0 2
|
||||||
|
|
||||||
|
Total # of neighbors = 1107985
|
||||||
|
Ave neighs/atom = 369.32833
|
||||||
|
Neighbor list builds = 2
|
||||||
|
Dangerous builds = 0
|
||||||
|
Total wall time: 0:00:03
|
||||||
116
examples/reaxff/water/log.20Mar25.reaxff.water-qeqr.g++.1
Normal file
116
examples/reaxff/water/log.20Mar25.reaxff.water-qeqr.g++.1
Normal file
@ -0,0 +1,116 @@
|
|||||||
|
LAMMPS (4 Feb 2025 - Development - patch_4Feb2025-444-gbb8b6590d5-modified)
|
||||||
|
using 1 OpenMP thread(s) per MPI task
|
||||||
|
# Water with QEqR
|
||||||
|
|
||||||
|
boundary p p p
|
||||||
|
units real
|
||||||
|
atom_style charge
|
||||||
|
|
||||||
|
read_data data.water
|
||||||
|
Reading data file ...
|
||||||
|
orthogonal box = (0 0 0) to (31.043046 31.043046 31.043046)
|
||||||
|
1 by 1 by 1 MPI processor grid
|
||||||
|
reading atoms ...
|
||||||
|
3000 atoms
|
||||||
|
read_data CPU = 0.055 seconds
|
||||||
|
|
||||||
|
variable x index 1
|
||||||
|
variable y index 1
|
||||||
|
variable z index 1
|
||||||
|
|
||||||
|
replicate $x $y $z
|
||||||
|
replicate 1 $y $z
|
||||||
|
replicate 1 1 $z
|
||||||
|
replicate 1 1 1
|
||||||
|
Replication is creating a 1x1x1 = 1 times larger system...
|
||||||
|
orthogonal box = (0 0 0) to (31.043046 31.043046 31.043046)
|
||||||
|
1 by 1 by 1 MPI processor grid
|
||||||
|
3000 atoms
|
||||||
|
replicate CPU = 0.001 seconds
|
||||||
|
|
||||||
|
pair_style reaxff NULL safezone 3.0 mincap 150
|
||||||
|
pair_coeff * * qeq_ff.water O H
|
||||||
|
WARNING: Changed valency_val to valency_boc for X (src/REAXFF/reaxff_ffield.cpp:300)
|
||||||
|
neighbor 0.5 bin
|
||||||
|
neigh_modify every 1 delay 0 check yes
|
||||||
|
|
||||||
|
velocity all create 300.0 4928459 rot yes dist gaussian
|
||||||
|
|
||||||
|
fix 1 all qeqr/reaxff 1 0.0 10.0 1.0e-6 reaxff gauss_exp.txt
|
||||||
|
fix 2 all nvt temp 300 300 50.0
|
||||||
|
|
||||||
|
timestep 0.5
|
||||||
|
thermo 10
|
||||||
|
thermo_style custom step temp press density vol
|
||||||
|
|
||||||
|
run 20
|
||||||
|
|
||||||
|
CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
|
||||||
|
|
||||||
|
Your simulation uses code contributions which should be cited:
|
||||||
|
|
||||||
|
- pair reaxff command: doi:10.1016/j.parco.2011.08.005
|
||||||
|
|
||||||
|
@Article{Aktulga12,
|
||||||
|
author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama},
|
||||||
|
title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques},
|
||||||
|
journal = {Parallel Computing},
|
||||||
|
year = 2012,
|
||||||
|
volume = 38,
|
||||||
|
number = {4--5},
|
||||||
|
pages = {245--259}
|
||||||
|
}
|
||||||
|
|
||||||
|
CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
|
||||||
|
|
||||||
|
WARNING: Use fix qeq/reaxff instead of fix qeqr/reaxff when not using fix efield
|
||||||
|
(src/REAXFF/fix_qtpie_reaxff.cpp:493)
|
||||||
|
Neighbor list info ...
|
||||||
|
update: every = 1 steps, delay = 0 steps, check = yes
|
||||||
|
max neighbors/atom: 2000, page size: 100000
|
||||||
|
master list distance cutoff = 10.5
|
||||||
|
ghost atom cutoff = 10.5
|
||||||
|
binsize = 5.25, bins = 6 6 6
|
||||||
|
2 neighbor lists, perpetual/occasional/extra = 2 0 0
|
||||||
|
(1) pair reaxff, perpetual
|
||||||
|
attributes: half, newton off, ghost
|
||||||
|
pair build: half/bin/ghost/newtoff
|
||||||
|
stencil: full/ghost/bin/3d
|
||||||
|
bin: standard
|
||||||
|
(2) fix qeqr/reaxff, perpetual, copy from (1)
|
||||||
|
attributes: half, newton off
|
||||||
|
pair build: copy
|
||||||
|
stencil: none
|
||||||
|
bin: none
|
||||||
|
Per MPI rank memory allocation (min/avg/max) = 539.2 | 539.2 | 539.2 Mbytes
|
||||||
|
Step Temp Press Density Volume
|
||||||
|
0 300 780.33989 1 29915.273
|
||||||
|
10 301.29205 5433.7414 1 29915.273
|
||||||
|
20 297.90652 1572.6111 1 29915.273
|
||||||
|
Loop time of 6.87447 on 1 procs for 20 steps with 3000 atoms
|
||||||
|
|
||||||
|
Performance: 0.126 ns/day, 190.957 hours/ns, 2.909 timesteps/s, 8.728 katom-step/s
|
||||||
|
100.0% CPU use with 1 MPI tasks x 1 OpenMP threads
|
||||||
|
|
||||||
|
MPI task timing breakdown:
|
||||||
|
Section | min time | avg time | max time |%varavg| %total
|
||||||
|
---------------------------------------------------------------
|
||||||
|
Pair | 4.8461 | 4.8461 | 4.8461 | 0.0 | 70.49
|
||||||
|
Neigh | 0.17595 | 0.17595 | 0.17595 | 0.0 | 2.56
|
||||||
|
Comm | 0.001787 | 0.001787 | 0.001787 | 0.0 | 0.03
|
||||||
|
Output | 8.5794e-05 | 8.5794e-05 | 8.5794e-05 | 0.0 | 0.00
|
||||||
|
Modify | 1.8501 | 1.8501 | 1.8501 | 0.0 | 26.91
|
||||||
|
Other | | 0.0004811 | | | 0.01
|
||||||
|
|
||||||
|
Nlocal: 3000 ave 3000 max 3000 min
|
||||||
|
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||||
|
Nghost: 11077 ave 11077 max 11077 min
|
||||||
|
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||||
|
Neighs: 971826 ave 971826 max 971826 min
|
||||||
|
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||||
|
|
||||||
|
Total # of neighbors = 971826
|
||||||
|
Ave neighs/atom = 323.942
|
||||||
|
Neighbor list builds = 2
|
||||||
|
Dangerous builds = 0
|
||||||
|
Total wall time: 0:00:07
|
||||||
116
examples/reaxff/water/log.20Mar25.reaxff.water-qeqr.g++.4
Normal file
116
examples/reaxff/water/log.20Mar25.reaxff.water-qeqr.g++.4
Normal file
@ -0,0 +1,116 @@
|
|||||||
|
LAMMPS (4 Feb 2025 - Development - patch_4Feb2025-444-gbb8b6590d5-modified)
|
||||||
|
using 1 OpenMP thread(s) per MPI task
|
||||||
|
# Water with QEqR
|
||||||
|
|
||||||
|
boundary p p p
|
||||||
|
units real
|
||||||
|
atom_style charge
|
||||||
|
|
||||||
|
read_data data.water
|
||||||
|
Reading data file ...
|
||||||
|
orthogonal box = (0 0 0) to (31.043046 31.043046 31.043046)
|
||||||
|
1 by 2 by 2 MPI processor grid
|
||||||
|
reading atoms ...
|
||||||
|
3000 atoms
|
||||||
|
read_data CPU = 0.082 seconds
|
||||||
|
|
||||||
|
variable x index 1
|
||||||
|
variable y index 1
|
||||||
|
variable z index 1
|
||||||
|
|
||||||
|
replicate $x $y $z
|
||||||
|
replicate 1 $y $z
|
||||||
|
replicate 1 1 $z
|
||||||
|
replicate 1 1 1
|
||||||
|
Replication is creating a 1x1x1 = 1 times larger system...
|
||||||
|
orthogonal box = (0 0 0) to (31.043046 31.043046 31.043046)
|
||||||
|
1 by 2 by 2 MPI processor grid
|
||||||
|
3000 atoms
|
||||||
|
replicate CPU = 0.002 seconds
|
||||||
|
|
||||||
|
pair_style reaxff NULL safezone 3.0 mincap 150
|
||||||
|
pair_coeff * * qeq_ff.water O H
|
||||||
|
WARNING: Changed valency_val to valency_boc for X (src/REAXFF/reaxff_ffield.cpp:300)
|
||||||
|
neighbor 0.5 bin
|
||||||
|
neigh_modify every 1 delay 0 check yes
|
||||||
|
|
||||||
|
velocity all create 300.0 4928459 rot yes dist gaussian
|
||||||
|
|
||||||
|
fix 1 all qeqr/reaxff 1 0.0 10.0 1.0e-6 reaxff gauss_exp.txt
|
||||||
|
fix 2 all nvt temp 300 300 50.0
|
||||||
|
|
||||||
|
timestep 0.5
|
||||||
|
thermo 10
|
||||||
|
thermo_style custom step temp press density vol
|
||||||
|
|
||||||
|
run 20
|
||||||
|
|
||||||
|
CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
|
||||||
|
|
||||||
|
Your simulation uses code contributions which should be cited:
|
||||||
|
|
||||||
|
- pair reaxff command: doi:10.1016/j.parco.2011.08.005
|
||||||
|
|
||||||
|
@Article{Aktulga12,
|
||||||
|
author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama},
|
||||||
|
title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques},
|
||||||
|
journal = {Parallel Computing},
|
||||||
|
year = 2012,
|
||||||
|
volume = 38,
|
||||||
|
number = {4--5},
|
||||||
|
pages = {245--259}
|
||||||
|
}
|
||||||
|
|
||||||
|
CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
|
||||||
|
|
||||||
|
WARNING: Use fix qeq/reaxff instead of fix qeqr/reaxff when not using fix efield
|
||||||
|
(src/REAXFF/fix_qtpie_reaxff.cpp:493)
|
||||||
|
Neighbor list info ...
|
||||||
|
update: every = 1 steps, delay = 0 steps, check = yes
|
||||||
|
max neighbors/atom: 2000, page size: 100000
|
||||||
|
master list distance cutoff = 10.5
|
||||||
|
ghost atom cutoff = 10.5
|
||||||
|
binsize = 5.25, bins = 6 6 6
|
||||||
|
2 neighbor lists, perpetual/occasional/extra = 2 0 0
|
||||||
|
(1) pair reaxff, perpetual
|
||||||
|
attributes: half, newton off, ghost
|
||||||
|
pair build: half/bin/ghost/newtoff
|
||||||
|
stencil: full/ghost/bin/3d
|
||||||
|
bin: standard
|
||||||
|
(2) fix qeqr/reaxff, perpetual, copy from (1)
|
||||||
|
attributes: half, newton off
|
||||||
|
pair build: copy
|
||||||
|
stencil: none
|
||||||
|
bin: none
|
||||||
|
Per MPI rank memory allocation (min/avg/max) = 260.5 | 262.2 | 263.6 Mbytes
|
||||||
|
Step Temp Press Density Volume
|
||||||
|
0 300 780.34006 1 29915.273
|
||||||
|
10 301.29205 5433.7414 1 29915.273
|
||||||
|
20 297.90652 1572.6112 1 29915.273
|
||||||
|
Loop time of 2.52349 on 4 procs for 20 steps with 3000 atoms
|
||||||
|
|
||||||
|
Performance: 0.342 ns/day, 70.097 hours/ns, 7.926 timesteps/s, 23.777 katom-step/s
|
||||||
|
99.0% CPU use with 4 MPI tasks x 1 OpenMP threads
|
||||||
|
|
||||||
|
MPI task timing breakdown:
|
||||||
|
Section | min time | avg time | max time |%varavg| %total
|
||||||
|
---------------------------------------------------------------
|
||||||
|
Pair | 1.7081 | 1.7518 | 1.7812 | 2.3 | 69.42
|
||||||
|
Neigh | 0.10017 | 0.10116 | 0.10315 | 0.4 | 4.01
|
||||||
|
Comm | 0.014848 | 0.044256 | 0.087941 | 14.7 | 1.75
|
||||||
|
Output | 5.1199e-05 | 5.663e-05 | 7.1837e-05 | 0.0 | 0.00
|
||||||
|
Modify | 0.62379 | 0.62575 | 0.62671 | 0.1 | 24.80
|
||||||
|
Other | | 0.000504 | | | 0.02
|
||||||
|
|
||||||
|
Nlocal: 750 ave 759 max 735 min
|
||||||
|
Histogram: 1 0 0 0 0 1 0 0 0 2
|
||||||
|
Nghost: 6230.5 ave 6256 max 6190 min
|
||||||
|
Histogram: 1 0 0 0 0 1 0 0 1 1
|
||||||
|
Neighs: 277008 ave 280943 max 271394 min
|
||||||
|
Histogram: 1 0 0 0 0 1 0 0 1 1
|
||||||
|
|
||||||
|
Total # of neighbors = 1108032
|
||||||
|
Ave neighs/atom = 369.344
|
||||||
|
Neighbor list builds = 2
|
||||||
|
Dangerous builds = 0
|
||||||
|
Total wall time: 0:00:02
|
||||||
@ -1,5 +1,72 @@
|
|||||||
# CHANGELOG
|
# CHANGELOG
|
||||||
|
|
||||||
|
## 4.6.00
|
||||||
|
|
||||||
|
[Full Changelog](https://github.com/kokkos/kokkos/compare/4.5.01...4.6.00)
|
||||||
|
|
||||||
|
### Features:
|
||||||
|
|
||||||
|
* Kokkos::Graph: Allow adding tasks to the graph via a `then`-node [\#7629](https://github.com/kokkos/kokkos/pull/7629)
|
||||||
|
* Kokkos::Graph: Allow construction from CUDA/HIP graph [\#7664](https://github.com/kokkos/kokkos/pull/7664)
|
||||||
|
* HIP: Add experimental support for using multiple GPUs from one process [\#7130](https://github.com/kokkos/kokkos/pull/7130)
|
||||||
|
|
||||||
|
### Backend and Architecture Enhancements:
|
||||||
|
|
||||||
|
#### CUDA:
|
||||||
|
* Improved reduction performance, in particular on H100 and newer [\#7823](https://github.com/kokkos/kokkos/pull/7823)
|
||||||
|
|
||||||
|
#### HIP:
|
||||||
|
* Change block size deduction to prefer smaller blocks/teams [\#7509](https://github.com/kokkos/kokkos/pull/7509)
|
||||||
|
* Allocate memory with stream ordered semantics (i.e. use `hipMallocAsync`) [\#7659](https://github.com/kokkos/kokkos/pull/7659)
|
||||||
|
* Fix a segfault when a virtual function called inside a kernel requires too many registers[\#7660](https://github.com/kokkos/kokkos/pull/7660)
|
||||||
|
|
||||||
|
#### SYCL:
|
||||||
|
* Improve sorting performance for non-contiguous views [\#7502](https://github.com/kokkos/kokkos/pull/7502)
|
||||||
|
|
||||||
|
#### Serial:
|
||||||
|
* Reduce fences overhead when using `Kokkos_ENABLE_ATOMICS_BYPASS` [\#7821](https://github.com/kokkos/kokkos/pull/7821)
|
||||||
|
|
||||||
|
### General Enhancements
|
||||||
|
* Allow use of `kokkos_check` in `<PackageName>Config.cmake` without warnings [\#7669](https://github.com/kokkos/kokkos/pull/7669)
|
||||||
|
* Add simd compound assignments and update simd reductions [\#7486](https://github.com/kokkos/kokkos/pull/7486)
|
||||||
|
* Improve performance of the `inclusive_scan` algorithm with Cuda and HIP [\#7542](https://github.com/kokkos/kokkos/pull/7542)
|
||||||
|
* Reduce tooling interface overhead (don't pay for what you don't use) [\#7817](https://github.com/kokkos/kokkos/pull/7817)
|
||||||
|
* Avoid storing the view in `RandomAccessIterator` to increase performance [\#7304](https://github.com/kokkos/kokkos/pull/7304)
|
||||||
|
* Make `RandomAccessIterator` fulfill `std::random_access_iterator concept` [\#7451](https://github.com/kokkos/kokkos/pull/7451)
|
||||||
|
* Include information about support for system allocated memory in `print_configuration` (Cuda and HIP) [\#7673](https://github.com/kokkos/kokkos/pull/7673)
|
||||||
|
|
||||||
|
### Build System Changes
|
||||||
|
* Add support for Zen 4 AMD microarchitecture [\#7550](https://github.com/kokkos/kokkos/pull/7550)
|
||||||
|
* Enable NVIDIA Grace architecture with NVHPC [\#7858](https://github.com/kokkos/kokkos/pull/7858)
|
||||||
|
* Support static library builds when using CUDA as CMake language [\#7830](https://github.com/kokkos/kokkos/pull/7830)
|
||||||
|
|
||||||
|
### Incompatibilities (i.e. breaking changes)
|
||||||
|
* Change SIMD comparison operator to return `simd_mask` instead of `bool` [\#7781](https://github.com/kokkos/kokkos/pull/7781)
|
||||||
|
* Remove classic Intel compiler (icpc) support [\#7737](https://github.com/kokkos/kokkos/pull/7737)
|
||||||
|
* Remove `operator[]` overloads of Kokkos `basic_simd` and `basic_simd_mask` that return a reference [\#7630](https://github.com/kokkos/kokkos/pull/7630)
|
||||||
|
|
||||||
|
### Deprecations
|
||||||
|
* Deprecate `StaticCrsGraph` and move it to Kokkos Kernels into `KokkosSparse::` [\#7516](https://github.com/kokkos/kokkos/pull/7516)
|
||||||
|
* Deprecate `native_simd` and hide `simd_abi` [\#7472](https://github.com/kokkos/kokkos/pull/7472)
|
||||||
|
* Deprecate Makefile support [\#7613](https://github.com/kokkos/kokkos/pull/7613)
|
||||||
|
* DualView: Deprecate direct access to d_view and h_view [\#7716](https://github.com/kokkos/kokkos/pull/7716)
|
||||||
|
|
||||||
|
### Bug Fixes
|
||||||
|
* Fix performance bug affecting `atomic_fetch_{add,sub,min,max,and,or,xor}` on integral types `long` and `unsigned long` with HIP [\#7816](https://github.com/kokkos/kokkos/pull/7816)
|
||||||
|
* Fix execution of ranges with more than 2B elements [\#7797](https://github.com/kokkos/kokkos/pull/7797)
|
||||||
|
* Fix clean target when embedding Kokkos in another project [\#7557](https://github.com/kokkos/kokkos/pull/7557)
|
||||||
|
* Fix Zen3 flag for NVHPC [\#7558](https://github.com/kokkos/kokkos/pull/7558)
|
||||||
|
* graph: nodes must be stored by the graph [\#7619](https://github.com/kokkos/kokkos/pull/7619)
|
||||||
|
* Make sure lock arrays are on device before launching a graph [\#7685](https://github.com/kokkos/kokkos/pull/7685)
|
||||||
|
* Performance bug in `RangePolicy`: construct error message if and only if the precondition is violated [\#7809](https://github.com/kokkos/kokkos/pull/7809)
|
||||||
|
* simd: fix a bug in scalar min/max [\#7813](https://github.com/kokkos/kokkos/pull/7813)
|
||||||
|
* simd: fix a bug in non-masked reductions [\#7845](https://github.com/kokkos/kokkos/pull/7845)
|
||||||
|
* Cuda: fix incorrect iteration in `MDRangePolicy` of rank > 4 for high iteration counts [\#7724](https://github.com/kokkos/kokkos/pull/7724)
|
||||||
|
* Cuda: ignore gcc assembler options in `nvcc-wrapper` [\#7492](https://github.com/kokkos/kokkos/pull/7492)
|
||||||
|
* Build system: hint to `ARCH_NATIVE` if ARMv9 Grace arch is not explicitly supported by the compiler [\#7862](https://github.com/kokkos/kokkos/pull/7862)
|
||||||
|
* Use right arch for MI300A in makefiles [\#7786](https://github.com/kokkos/kokkos/pull/7786)
|
||||||
|
* Fix compiling BasicView on MSVC [\#7751](https://github.com/kokkos/kokkos/pull/7751)
|
||||||
|
|
||||||
## 4.5.01
|
## 4.5.01
|
||||||
|
|
||||||
[Full Changelog](https://github.com/kokkos/kokkos/compare/4.5.00...4.5.01)
|
[Full Changelog](https://github.com/kokkos/kokkos/compare/4.5.00...4.5.01)
|
||||||
|
|||||||
@ -148,8 +148,8 @@ elseif(NOT CMAKE_SIZEOF_VOID_P EQUAL 8)
|
|||||||
endif()
|
endif()
|
||||||
|
|
||||||
set(Kokkos_VERSION_MAJOR 4)
|
set(Kokkos_VERSION_MAJOR 4)
|
||||||
set(Kokkos_VERSION_MINOR 5)
|
set(Kokkos_VERSION_MINOR 6)
|
||||||
set(Kokkos_VERSION_PATCH 1)
|
set(Kokkos_VERSION_PATCH 0)
|
||||||
set(Kokkos_VERSION "${Kokkos_VERSION_MAJOR}.${Kokkos_VERSION_MINOR}.${Kokkos_VERSION_PATCH}")
|
set(Kokkos_VERSION "${Kokkos_VERSION_MAJOR}.${Kokkos_VERSION_MINOR}.${Kokkos_VERSION_PATCH}")
|
||||||
message(STATUS "Kokkos version: ${Kokkos_VERSION}")
|
message(STATUS "Kokkos version: ${Kokkos_VERSION}")
|
||||||
math(EXPR KOKKOS_VERSION "${Kokkos_VERSION_MAJOR} * 10000 + ${Kokkos_VERSION_MINOR} * 100 + ${Kokkos_VERSION_PATCH}")
|
math(EXPR KOKKOS_VERSION "${Kokkos_VERSION_MAJOR} * 10000 + ${Kokkos_VERSION_MINOR} * 100 + ${Kokkos_VERSION_PATCH}")
|
||||||
|
|||||||
4
lib/kokkos/CTestConfig.cmake
Normal file
4
lib/kokkos/CTestConfig.cmake
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
set(CTEST_PROJECT_NAME Kokkos)
|
||||||
|
set(CTEST_NIGHTLY_START_TIME 01:00:00 UTC)
|
||||||
|
set(CTEST_SUBMIT_URL https://my.cdash.org/submit.php?project=Kokkos)
|
||||||
|
set(CTEST_DROP_SITE_CDASH TRUE)
|
||||||
@ -1,18 +1,26 @@
|
|||||||
# Default settings common options.
|
# Default settings common options.
|
||||||
|
|
||||||
#SPARTA specific settings:
|
#LAMMPS specific settings:
|
||||||
|
|
||||||
|
KOKKOS_USE_DEPRECATED_MAKEFILES=1
|
||||||
|
|
||||||
ifndef KOKKOS_PATH
|
ifndef KOKKOS_PATH
|
||||||
KOKKOS_PATH=../../lib/kokkos
|
KOKKOS_PATH=../../lib/kokkos
|
||||||
endif
|
endif
|
||||||
|
|
||||||
CXXFLAGS=$(CCFLAGS)
|
CXXFLAGS=$(CCFLAGS)
|
||||||
ifeq ($(mode),shared)
|
ifeq ($(mode),shared)
|
||||||
CXXFLAGS += $(SHFLAGS)
|
CXXFLAGS += $(SHFLAGS)
|
||||||
|
endif
|
||||||
|
|
||||||
|
|
||||||
|
ifneq ($(KOKKOS_USE_DEPRECATED_MAKEFILES), 1)
|
||||||
|
$(error Makefile support is deprecated. Only CMake builds will be supported from Kokkos 5 on. Set KOKKOS_USE_DEPRECATED_MAKEFILES=1 to silence this error.)
|
||||||
endif
|
endif
|
||||||
|
|
||||||
KOKKOS_VERSION_MAJOR = 4
|
KOKKOS_VERSION_MAJOR = 4
|
||||||
KOKKOS_VERSION_MINOR = 5
|
KOKKOS_VERSION_MINOR = 6
|
||||||
KOKKOS_VERSION_PATCH = 1
|
KOKKOS_VERSION_PATCH = 0
|
||||||
KOKKOS_VERSION = $(shell echo $(KOKKOS_VERSION_MAJOR)*10000+$(KOKKOS_VERSION_MINOR)*100+$(KOKKOS_VERSION_PATCH) | bc)
|
KOKKOS_VERSION = $(shell echo $(KOKKOS_VERSION_MAJOR)*10000+$(KOKKOS_VERSION_MINOR)*100+$(KOKKOS_VERSION_PATCH) | bc)
|
||||||
|
|
||||||
# Options: Cuda,HIP,SYCL,OpenMPTarget,OpenMP,Threads,Serial
|
# Options: Cuda,HIP,SYCL,OpenMPTarget,OpenMP,Threads,Serial
|
||||||
@ -24,7 +32,7 @@ KOKKOS_DEVICES ?= "OpenMP"
|
|||||||
# ARM: ARMv80,ARMv81,ARMv8-ThunderX,ARMv8-TX2,A64FX,ARMv9-Grace
|
# ARM: ARMv80,ARMv81,ARMv8-ThunderX,ARMv8-TX2,A64FX,ARMv9-Grace
|
||||||
# IBM: Power8,Power9
|
# IBM: Power8,Power9
|
||||||
# AMD-GPUS: AMD_GFX906,AMD_GFX908,AMD_GFX90A,AMD_GFX940,AMD_GFX942,AMD_GFX942_APU,AMD_GFX1030,AMD_GFX1100,AMD_GFX1103
|
# AMD-GPUS: AMD_GFX906,AMD_GFX908,AMD_GFX90A,AMD_GFX940,AMD_GFX942,AMD_GFX942_APU,AMD_GFX1030,AMD_GFX1100,AMD_GFX1103
|
||||||
# AMD-CPUS: AMDAVX,Zen,Zen2,Zen3
|
# AMD-CPUS: AMDAVX,Zen,Zen2,Zen3,Zen4
|
||||||
# Intel-GPUs: Intel_Gen,Intel_Gen9,Intel_Gen11,Intel_Gen12LP,Intel_DG1,Intel_XeHP,Intel_PVC
|
# Intel-GPUs: Intel_Gen,Intel_Gen9,Intel_Gen11,Intel_Gen12LP,Intel_DG1,Intel_XeHP,Intel_PVC
|
||||||
KOKKOS_ARCH ?= ""
|
KOKKOS_ARCH ?= ""
|
||||||
# Options: yes,no
|
# Options: yes,no
|
||||||
@ -442,12 +450,15 @@ KOKKOS_INTERNAL_USE_ARCH_IBM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_
|
|||||||
|
|
||||||
# AMD based.
|
# AMD based.
|
||||||
KOKKOS_INTERNAL_USE_ARCH_AMDAVX := $(call kokkos_has_string,$(KOKKOS_ARCH),AMDAVX)
|
KOKKOS_INTERNAL_USE_ARCH_AMDAVX := $(call kokkos_has_string,$(KOKKOS_ARCH),AMDAVX)
|
||||||
|
KOKKOS_INTERNAL_USE_ARCH_ZEN4 := $(call kokkos_has_string,$(KOKKOS_ARCH),Zen4)
|
||||||
KOKKOS_INTERNAL_USE_ARCH_ZEN3 := $(call kokkos_has_string,$(KOKKOS_ARCH),Zen3)
|
KOKKOS_INTERNAL_USE_ARCH_ZEN3 := $(call kokkos_has_string,$(KOKKOS_ARCH),Zen3)
|
||||||
KOKKOS_INTERNAL_USE_ARCH_ZEN2 := $(call kokkos_has_string,$(KOKKOS_ARCH),Zen2)
|
KOKKOS_INTERNAL_USE_ARCH_ZEN2 := $(call kokkos_has_string,$(KOKKOS_ARCH),Zen2)
|
||||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ZEN3), 0)
|
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ZEN4), 0)
|
||||||
|
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ZEN3), 0)
|
||||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ZEN2), 0)
|
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ZEN2), 0)
|
||||||
KOKKOS_INTERNAL_USE_ARCH_ZEN := $(call kokkos_has_string,$(KOKKOS_ARCH),Zen)
|
KOKKOS_INTERNAL_USE_ARCH_ZEN := $(call kokkos_has_string,$(KOKKOS_ARCH),Zen)
|
||||||
endif
|
endif
|
||||||
|
endif
|
||||||
endif
|
endif
|
||||||
|
|
||||||
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX906 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX906)
|
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX906 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX906)
|
||||||
@ -463,8 +474,10 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX90A), 0)
|
|||||||
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX90A := $(call kokkos_has_string,$(KOKKOS_ARCH),VEGA90A)
|
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX90A := $(call kokkos_has_string,$(KOKKOS_ARCH),VEGA90A)
|
||||||
endif
|
endif
|
||||||
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX940 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX940)
|
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX940 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX940)
|
||||||
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX942 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX942)
|
|
||||||
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX942_APU := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX942_APU)
|
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX942_APU := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX942_APU)
|
||||||
|
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX942_APU), 0)
|
||||||
|
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX942 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX942)
|
||||||
|
endif
|
||||||
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1030 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX1030)
|
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1030 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX1030)
|
||||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1030), 0)
|
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1030), 0)
|
||||||
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1030 := $(call kokkos_has_string,$(KOKKOS_ARCH),NAVI1030)
|
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1030 := $(call kokkos_has_string,$(KOKKOS_ARCH),NAVI1030)
|
||||||
@ -857,6 +870,19 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ZEN3), 1)
|
|||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ZEN4), 1)
|
||||||
|
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_ZEN4")
|
||||||
|
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX512XEON")
|
||||||
|
|
||||||
|
ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
|
||||||
|
KOKKOS_CXXFLAGS += -xCORE-AVX512
|
||||||
|
KOKKOS_LDFLAGS += -xCORE-AVX512
|
||||||
|
else
|
||||||
|
KOKKOS_CXXFLAGS += -march=znver4 -mtune=znver4
|
||||||
|
KOKKOS_LDFLAGS += -march=znver4 -mtune=znver4
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
|
||||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX), 1)
|
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX), 1)
|
||||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARMV80")
|
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARMV80")
|
||||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARMV8_THUNDERX")
|
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARMV8_THUNDERX")
|
||||||
|
|||||||
@ -18,24 +18,24 @@ Kokkos is a [Linux Foundation](https://linuxfoundation.org) project.
|
|||||||
|
|
||||||
To start learning about Kokkos:
|
To start learning about Kokkos:
|
||||||
|
|
||||||
- [Kokkos Lectures](https://kokkos.org/kokkos-core-wiki/videolectures.html): they contain a mix of lecture videos and hands-on exercises covering all the important capabilities.
|
- [Kokkos Lectures](https://kokkos.org/kokkos-core-wiki/tutorials-and-examples/video-lectures.html): they contain a mix of lecture videos and hands-on exercises covering all the important capabilities.
|
||||||
|
|
||||||
- [Programming guide](https://kokkos.org/kokkos-core-wiki/programmingguide.html): contains in "narrative" form a technical description of the programming model, machine model, and the main building blocks like the Views and parallel dispatch.
|
- [Programming guide](https://kokkos.org/kokkos-core-wiki/programmingguide.html): contains in "narrative" form a technical description of the programming model, machine model, and the main building blocks like the Views and parallel dispatch.
|
||||||
|
|
||||||
- [API reference](https://kokkos.org/kokkos-core-wiki/): organized by category, i.e., [core](https://kokkos.org/kokkos-core-wiki/API/core-index.html), [algorithms](https://kokkos.org/kokkos-core-wiki/API/algorithms-index.html) and [containers](https://kokkos.org/kokkos-core-wiki/API/containers-index.html) or, if you prefer, in [alphabetical order](https://kokkos.org/kokkos-core-wiki/API/alphabetical.html).
|
- [API reference](https://kokkos.org/kokkos-core-wiki/): organized by category, i.e., [core](https://kokkos.org/kokkos-core-wiki/API/core-index.html), [algorithms](https://kokkos.org/kokkos-core-wiki/API/algorithms-index.html) and [containers](https://kokkos.org/kokkos-core-wiki/API/containers-index.html) or, if you prefer, in [alphabetical order](https://kokkos.org/kokkos-core-wiki/API/alphabetical.html).
|
||||||
|
|
||||||
- [Use cases and Examples](https://kokkos.org/kokkos-core-wiki/usecases.html): a serie of examples ranging from how to use Kokkos with MPI to Fortran interoperability.
|
- [Use cases and Examples](https://kokkos.org/kokkos-core-wiki/tutorials-and-examples/use-cases-and-examples.html): a serie of examples ranging from how to use Kokkos with MPI to Fortran interoperability.
|
||||||
|
|
||||||
## Obtaining Kokkos
|
## Obtaining Kokkos
|
||||||
|
|
||||||
The latest release of Kokkos can be obtained from the [GitHub releases page](https://github.com/kokkos/kokkos/releases/latest).
|
The latest release of Kokkos can be obtained from the [GitHub releases page](https://github.com/kokkos/kokkos/releases/latest).
|
||||||
|
|
||||||
The current release is [4.5.01](https://github.com/kokkos/kokkos/releases/tag/4.5.01).
|
The current release is [4.6.00](https://github.com/kokkos/kokkos/releases/tag/4.6.00).
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
curl -OJ -L https://github.com/kokkos/kokkos/releases/download/4.5.01/kokkos-4.5.01.tar.gz
|
curl -OJ -L https://github.com/kokkos/kokkos/releases/download/4.6.00/kokkos-4.6.00.tar.gz
|
||||||
# Or with wget
|
# Or with wget
|
||||||
wget https://github.com/kokkos/kokkos/releases/download/4.5.01/kokkos-4.5.01.tar.gz
|
wget https://github.com/kokkos/kokkos/releases/download/4.6.00/kokkos-4.6.00.tar.gz
|
||||||
```
|
```
|
||||||
|
|
||||||
To clone the latest development version of Kokkos from GitHub:
|
To clone the latest development version of Kokkos from GitHub:
|
||||||
@ -47,7 +47,7 @@ git clone -b develop https://github.com/kokkos/kokkos.git
|
|||||||
### Building Kokkos
|
### Building Kokkos
|
||||||
|
|
||||||
To build Kokkos, you will need to have a C++ compiler that supports C++17 or later.
|
To build Kokkos, you will need to have a C++ compiler that supports C++17 or later.
|
||||||
All requirements including minimum and primary tested compiler versions can be found [here](https://kokkos.org/kokkos-core-wiki/requirements.html).
|
All requirements including minimum and primary tested compiler versions can be found [here](https://kokkos.org/kokkos-core-wiki/get-started/requirements.html).
|
||||||
|
|
||||||
Building and installation instructions are described [here](https://kokkos.org/kokkos-core-wiki/building.html).
|
Building and installation instructions are described [here](https://kokkos.org/kokkos-core-wiki/building.html).
|
||||||
|
|
||||||
|
|||||||
@ -5,3 +5,7 @@ endif()
|
|||||||
if(NOT ((KOKKOS_ENABLE_OPENMPTARGET AND KOKKOS_CXX_COMPILER_ID STREQUAL NVHPC) OR KOKKOS_ENABLE_OPENACC))
|
if(NOT ((KOKKOS_ENABLE_OPENMPTARGET AND KOKKOS_CXX_COMPILER_ID STREQUAL NVHPC) OR KOKKOS_ENABLE_OPENACC))
|
||||||
kokkos_add_test_directories(unit_tests)
|
kokkos_add_test_directories(unit_tests)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
if(Kokkos_ENABLE_BENCHMARKS)
|
||||||
|
add_subdirectory(perf_test)
|
||||||
|
endif()
|
||||||
|
|||||||
63
lib/kokkos/algorithms/perf_test/CMakeLists.txt
Normal file
63
lib/kokkos/algorithms/perf_test/CMakeLists.txt
Normal file
@ -0,0 +1,63 @@
|
|||||||
|
# FIXME: The following logic should be moved from here and also from `core/perf_test/CMakeLists.txt` to
|
||||||
|
# the root `CMakeLists.txt` in the form of a macro
|
||||||
|
# Find or download google/benchmark library
|
||||||
|
find_package(benchmark QUIET 1.5.6)
|
||||||
|
if(benchmark_FOUND)
|
||||||
|
message(STATUS "Using google benchmark found in ${benchmark_DIR}")
|
||||||
|
else()
|
||||||
|
message(STATUS "No installed google benchmark found, fetching from GitHub")
|
||||||
|
include(FetchContent)
|
||||||
|
set(BENCHMARK_ENABLE_TESTING OFF)
|
||||||
|
|
||||||
|
list(APPEND CMAKE_MESSAGE_INDENT "[benchmark] ")
|
||||||
|
FetchContent_Declare(
|
||||||
|
googlebenchmark
|
||||||
|
DOWNLOAD_EXTRACT_TIMESTAMP FALSE
|
||||||
|
URL https://github.com/google/benchmark/archive/refs/tags/v1.7.1.tar.gz
|
||||||
|
URL_HASH MD5=0459a6c530df9851bee6504c3e37c2e7
|
||||||
|
)
|
||||||
|
FetchContent_MakeAvailable(googlebenchmark)
|
||||||
|
list(POP_BACK CMAKE_MESSAGE_INDENT)
|
||||||
|
|
||||||
|
# Suppress clang-tidy diagnostics on code that we do not have control over
|
||||||
|
if(CMAKE_CXX_CLANG_TIDY)
|
||||||
|
set_target_properties(benchmark PROPERTIES CXX_CLANG_TIDY "")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
# FIXME: Check whether the following target_compile_options are needed.
|
||||||
|
# If so, clarify why.
|
||||||
|
target_compile_options(benchmark PRIVATE -w)
|
||||||
|
target_compile_options(benchmark_main PRIVATE -w)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
# FIXME: This function should be moved from here and also from `core/perf_test/CMakeLists.txt` to
|
||||||
|
# the root `CMakeLists.txt`
|
||||||
|
# FIXME: Could NAME be a one_value_keyword specified in cmake_parse_arguments?
|
||||||
|
function(KOKKOS_ADD_BENCHMARK NAME)
|
||||||
|
cmake_parse_arguments(BENCHMARK "" "" "SOURCES" ${ARGN})
|
||||||
|
if(DEFINED BENCHMARK_UNPARSED_ARGUMENTS)
|
||||||
|
message(WARNING "Unexpected arguments when adding a benchmark: " ${BENCHMARK_UNPARSED_ARGUMENTS})
|
||||||
|
endif()
|
||||||
|
|
||||||
|
set(BENCHMARK_NAME Kokkos_${NAME})
|
||||||
|
# FIXME: BenchmarkMain.cpp and Benchmark_Context.cpp should be moved to a common location from which
|
||||||
|
# they can be used by all performance tests.
|
||||||
|
list(APPEND BENCHMARK_SOURCES ../../core/perf_test/BenchmarkMain.cpp ../../core/perf_test/Benchmark_Context.cpp)
|
||||||
|
|
||||||
|
add_executable(${BENCHMARK_NAME} ${BENCHMARK_SOURCES})
|
||||||
|
target_link_libraries(${BENCHMARK_NAME} PRIVATE benchmark::benchmark Kokkos::kokkos impl_git_version)
|
||||||
|
target_include_directories(${BENCHMARK_NAME} SYSTEM PRIVATE ${benchmark_SOURCE_DIR}/include)
|
||||||
|
|
||||||
|
# FIXME: This alone will not work. It might need an architecture and standard which need to be defined on target level.
|
||||||
|
# It will potentially go away with #7582.
|
||||||
|
foreach(SOURCE_FILE ${BENCHMARK_SOURCES})
|
||||||
|
set_source_files_properties(${SOURCE_FILE} PROPERTIES LANGUAGE ${KOKKOS_COMPILE_LANGUAGE})
|
||||||
|
endforeach()
|
||||||
|
|
||||||
|
string(TIMESTAMP BENCHMARK_TIME "%Y-%m-%d_T%H-%M-%S" UTC)
|
||||||
|
set(BENCHMARK_ARGS --benchmark_counters_tabular=true --benchmark_out=${BENCHMARK_NAME}_${BENCHMARK_TIME}.json)
|
||||||
|
|
||||||
|
add_test(NAME ${BENCHMARK_NAME} COMMAND ${BENCHMARK_NAME} ${BENCHMARK_ARGS})
|
||||||
|
endfunction()
|
||||||
|
|
||||||
|
kokkos_add_benchmark(PerformanceTest_InclusiveScan SOURCES test_inclusive_scan.cpp)
|
||||||
191
lib/kokkos/algorithms/perf_test/test_inclusive_scan.cpp
Normal file
191
lib/kokkos/algorithms/perf_test/test_inclusive_scan.cpp
Normal file
@ -0,0 +1,191 @@
|
|||||||
|
//@HEADER
|
||||||
|
// ************************************************************************
|
||||||
|
//
|
||||||
|
// Kokkos v. 4.0
|
||||||
|
// Copyright (2022) National Technology & Engineering
|
||||||
|
// Solutions of Sandia, LLC (NTESS).
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-NA0003525 with NTESS,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
|
||||||
|
// See https://kokkos.org/LICENSE for license information.
|
||||||
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||||
|
//
|
||||||
|
//@HEADER
|
||||||
|
|
||||||
|
#include <cstddef>
|
||||||
|
#include <cstdint>
|
||||||
|
#include <tuple>
|
||||||
|
#include <type_traits>
|
||||||
|
|
||||||
|
#include <benchmark/benchmark.h>
|
||||||
|
|
||||||
|
#include <Kokkos_Core.hpp>
|
||||||
|
#include <Kokkos_Timer.hpp>
|
||||||
|
#include <Kokkos_StdAlgorithms.hpp>
|
||||||
|
// FIXME: Benchmark_Context.hpp should be moved to a common location
|
||||||
|
#include "../../core/perf_test/Benchmark_Context.hpp"
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
namespace KE = Kokkos::Experimental;
|
||||||
|
|
||||||
|
using ExecSpace = Kokkos::DefaultExecutionSpace;
|
||||||
|
using HostExecSpace = Kokkos::DefaultHostExecutionSpace;
|
||||||
|
|
||||||
|
// A tag struct to identify when inclusive scan with the implicit sum
|
||||||
|
// based binary operation needs to be called.
|
||||||
|
template <class ValueType>
|
||||||
|
struct ImpSumBinOp;
|
||||||
|
|
||||||
|
template <class ValueType>
|
||||||
|
struct SumFunctor {
|
||||||
|
KOKKOS_FUNCTION
|
||||||
|
ValueType operator()(const ValueType& a, const ValueType& b) const {
|
||||||
|
return (a + b);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <class ValueType>
|
||||||
|
struct MaxFunctor {
|
||||||
|
KOKKOS_FUNCTION
|
||||||
|
ValueType operator()(const ValueType& a, const ValueType& b) const {
|
||||||
|
if (a > b)
|
||||||
|
return a;
|
||||||
|
else
|
||||||
|
return b;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Helper to obtain last element of a view
|
||||||
|
template <class T>
|
||||||
|
T obtain_last_elem(const Kokkos::View<T*, ExecSpace>& v) {
|
||||||
|
T last_element;
|
||||||
|
Kokkos::deep_copy(last_element, Kokkos::subview(v, v.extent(0) - 1));
|
||||||
|
return last_element;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Helper to allocate input and output views
|
||||||
|
template <class T>
|
||||||
|
auto prepare_views(const std::size_t kProbSize) {
|
||||||
|
Kokkos::View<T*, ExecSpace> in{"input", kProbSize};
|
||||||
|
Kokkos::View<T*, ExecSpace> out{"output", kProbSize};
|
||||||
|
|
||||||
|
auto h_in = Kokkos::create_mirror_view(in);
|
||||||
|
|
||||||
|
for (std::size_t i = 0; i < kProbSize; ++i) {
|
||||||
|
h_in(i) = i;
|
||||||
|
}
|
||||||
|
|
||||||
|
Kokkos::deep_copy(in, h_in);
|
||||||
|
|
||||||
|
return std::make_tuple(in, out, h_in);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Perform scan with a reference implementation
|
||||||
|
template <class T, class ViewType, class ScanFunctor = SumFunctor<T>>
|
||||||
|
T ref_scan(const ViewType& h_in, ScanFunctor scan_functor = ScanFunctor()) {
|
||||||
|
std::size_t view_size = h_in.extent(0);
|
||||||
|
|
||||||
|
Kokkos::View<T*, HostExecSpace> h_out("output", view_size);
|
||||||
|
|
||||||
|
// FIXME: We have GCC 8.4.0 based check in our ORNL Jenkins CI.
|
||||||
|
// std::inclusive_scan is available only from GCC 9.3. Since, GCC 9.1
|
||||||
|
// std::inclusive_scan that takes execution policy is available. However,
|
||||||
|
// there is error with <execution> header before GCC 10.1.
|
||||||
|
h_out(0) = h_in(0);
|
||||||
|
|
||||||
|
for (std::size_t i = 1; i < view_size; ++i) {
|
||||||
|
h_out(i) = scan_functor(h_in(i), h_out(i - 1));
|
||||||
|
}
|
||||||
|
|
||||||
|
return h_out(view_size - 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Inclusive Scan with default binary operation (sum) or user provided functor
|
||||||
|
// Note: The nature of the functor must be compatible with the
|
||||||
|
// elements in the input and output views
|
||||||
|
template <class T, template <class> class ScanFunctor = ImpSumBinOp>
|
||||||
|
auto inclusive_scan(const Kokkos::View<T*, ExecSpace>& in,
|
||||||
|
const Kokkos::View<T*, ExecSpace>& out, T res_check) {
|
||||||
|
ExecSpace().fence();
|
||||||
|
Kokkos::Timer timer;
|
||||||
|
|
||||||
|
if constexpr (std::is_same_v<ScanFunctor<T>, ImpSumBinOp<T>>) {
|
||||||
|
KE::inclusive_scan("Default scan", ExecSpace(), KE::cbegin(in),
|
||||||
|
KE::cend(in), KE::begin(out));
|
||||||
|
} else {
|
||||||
|
KE::inclusive_scan("Scan using a functor", ExecSpace(), KE::cbegin(in),
|
||||||
|
KE::cend(in), KE::begin(out), ScanFunctor<T>());
|
||||||
|
}
|
||||||
|
|
||||||
|
ExecSpace().fence();
|
||||||
|
double time_scan = timer.seconds();
|
||||||
|
|
||||||
|
T res_scan = obtain_last_elem(out);
|
||||||
|
bool passed = (res_check == res_scan);
|
||||||
|
|
||||||
|
return std::make_tuple(time_scan, passed);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Benchmark: Inclusive Scan with default binary operation (sum)
|
||||||
|
// or user provided functor
|
||||||
|
template <class T, template <class> class ScanFunctor = ImpSumBinOp>
|
||||||
|
void BM_inclusive_scan(benchmark::State& state) {
|
||||||
|
const std::size_t kProbSize = state.range(0);
|
||||||
|
|
||||||
|
auto [in, out, h_in] = prepare_views<T>(kProbSize);
|
||||||
|
|
||||||
|
T res_check;
|
||||||
|
|
||||||
|
if constexpr (std::is_same_v<ScanFunctor<T>, ImpSumBinOp<T>>) {
|
||||||
|
res_check = ref_scan<T>(h_in);
|
||||||
|
} else {
|
||||||
|
res_check = ref_scan<T>(h_in, ScanFunctor<T>());
|
||||||
|
}
|
||||||
|
|
||||||
|
double time_scan = 0.;
|
||||||
|
bool passed = false;
|
||||||
|
|
||||||
|
for (auto _ : state) {
|
||||||
|
if constexpr (std::is_same_v<ScanFunctor<T>, ImpSumBinOp<T>>) {
|
||||||
|
std::tie(time_scan, passed) = inclusive_scan<T>(in, out, res_check);
|
||||||
|
} else {
|
||||||
|
std::tie(time_scan, passed) =
|
||||||
|
inclusive_scan<T, ScanFunctor>(in, out, res_check);
|
||||||
|
}
|
||||||
|
|
||||||
|
KokkosBenchmark::report_results(state, in, 2, time_scan);
|
||||||
|
state.counters["Passed"] = passed;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
constexpr std::size_t PROB_SIZE = 100'000'000;
|
||||||
|
|
||||||
|
} // anonymous namespace
|
||||||
|
|
||||||
|
// FIXME: Add logic to pass min. warm-up time. Also, the value should be set
|
||||||
|
// by the user. Say, via the environment variable BENCHMARK_MIN_WARMUP_TIME.
|
||||||
|
|
||||||
|
BENCHMARK(BM_inclusive_scan<std::uint64_t>)->Arg(PROB_SIZE)->UseManualTime();
|
||||||
|
BENCHMARK(BM_inclusive_scan<std::int64_t>)->Arg(PROB_SIZE)->UseManualTime();
|
||||||
|
BENCHMARK(BM_inclusive_scan<double>)->Arg(PROB_SIZE)->UseManualTime();
|
||||||
|
BENCHMARK(BM_inclusive_scan<std::uint64_t, SumFunctor>)
|
||||||
|
->Arg(PROB_SIZE)
|
||||||
|
->UseManualTime();
|
||||||
|
BENCHMARK(BM_inclusive_scan<std::int64_t, SumFunctor>)
|
||||||
|
->Arg(PROB_SIZE)
|
||||||
|
->UseManualTime();
|
||||||
|
BENCHMARK(BM_inclusive_scan<double, SumFunctor>)
|
||||||
|
->Arg(PROB_SIZE)
|
||||||
|
->UseManualTime();
|
||||||
|
BENCHMARK(BM_inclusive_scan<std::uint64_t, MaxFunctor>)
|
||||||
|
->Arg(PROB_SIZE)
|
||||||
|
->UseManualTime();
|
||||||
|
BENCHMARK(BM_inclusive_scan<std::int64_t, MaxFunctor>)
|
||||||
|
->Arg(PROB_SIZE)
|
||||||
|
->UseManualTime();
|
||||||
|
BENCHMARK(BM_inclusive_scan<double, MaxFunctor>)
|
||||||
|
->Arg(PROB_SIZE)
|
||||||
|
->UseManualTime();
|
||||||
@ -587,11 +587,13 @@ struct Random_XorShift1024_State<false> {
|
|||||||
int state_idx)
|
int state_idx)
|
||||||
: state_(&v(state_idx, 0)), stride_(v.stride_1()) {}
|
: state_(&v(state_idx, 0)), stride_(v.stride_1()) {}
|
||||||
|
|
||||||
|
// NOLINTBEGIN(bugprone-implicit-widening-of-multiplication-result)
|
||||||
KOKKOS_FUNCTION
|
KOKKOS_FUNCTION
|
||||||
uint64_t operator[](const int i) const { return state_[i * stride_]; }
|
uint64_t operator[](const int i) const { return state_[i * stride_]; }
|
||||||
|
|
||||||
KOKKOS_FUNCTION
|
KOKKOS_FUNCTION
|
||||||
uint64_t& operator[](const int i) { return state_[i * stride_]; }
|
uint64_t& operator[](const int i) { return state_[i * stride_]; }
|
||||||
|
// NOLINTEND(bugprone-implicit-widening-of-multiplication-result)
|
||||||
};
|
};
|
||||||
|
|
||||||
template <class ExecutionSpace>
|
template <class ExecutionSpace>
|
||||||
@ -670,7 +672,12 @@ struct Random_UniqueIndex<Kokkos::Device<Kokkos::SYCL, MemorySpace>> {
|
|||||||
View<int**, Kokkos::Device<Kokkos::SYCL, MemorySpace>>;
|
View<int**, Kokkos::Device<Kokkos::SYCL, MemorySpace>>;
|
||||||
KOKKOS_FUNCTION
|
KOKKOS_FUNCTION
|
||||||
static int get_state_idx(const locks_view_type& locks_) {
|
static int get_state_idx(const locks_view_type& locks_) {
|
||||||
|
#if defined(KOKKOS_COMPILER_INTEL_LLVM) && \
|
||||||
|
KOKKOS_COMPILER_INTEL_LLVM >= 20250000
|
||||||
|
auto item = sycl::ext::oneapi::this_work_item::get_nd_item<3>();
|
||||||
|
#else
|
||||||
auto item = sycl::ext::oneapi::experimental::this_nd_item<3>();
|
auto item = sycl::ext::oneapi::experimental::this_nd_item<3>();
|
||||||
|
#endif
|
||||||
std::size_t threadIdx[3] = {item.get_local_id(2), item.get_local_id(1),
|
std::size_t threadIdx[3] = {item.get_local_id(2), item.get_local_id(1),
|
||||||
item.get_local_id(0)};
|
item.get_local_id(0)};
|
||||||
std::size_t blockIdx[3] = {item.get_group(2), item.get_group(1),
|
std::size_t blockIdx[3] = {item.get_group(2), item.get_group(1),
|
||||||
|
|||||||
@ -45,7 +45,7 @@ struct BinOp1D {
|
|||||||
// For integral types the number of bins may be larger than the range
|
// For integral types the number of bins may be larger than the range
|
||||||
// in which case we can exactly have one unique value per bin
|
// in which case we can exactly have one unique value per bin
|
||||||
// and then don't need to sort bins.
|
// and then don't need to sort bins.
|
||||||
if (std::is_integral<typename KeyViewType::const_value_type>::value &&
|
if (std::is_integral_v<typename KeyViewType::const_value_type> &&
|
||||||
(static_cast<double>(max) - static_cast<double>(min)) <=
|
(static_cast<double>(max) - static_cast<double>(min)) <=
|
||||||
static_cast<double>(max_bins)) {
|
static_cast<double>(max_bins)) {
|
||||||
mul_ = 1.;
|
mul_ = 1.;
|
||||||
|
|||||||
@ -53,13 +53,9 @@ void sort(const ExecutionSpace& exec,
|
|||||||
|
|
||||||
if constexpr (Impl::better_off_calling_std_sort_v<ExecutionSpace>) {
|
if constexpr (Impl::better_off_calling_std_sort_v<ExecutionSpace>) {
|
||||||
exec.fence("Kokkos::sort without comparator use std::sort");
|
exec.fence("Kokkos::sort without comparator use std::sort");
|
||||||
if (view.span_is_contiguous()) {
|
|
||||||
std::sort(view.data(), view.data() + view.size());
|
|
||||||
} else {
|
|
||||||
auto first = ::Kokkos::Experimental::begin(view);
|
auto first = ::Kokkos::Experimental::begin(view);
|
||||||
auto last = ::Kokkos::Experimental::end(view);
|
auto last = ::Kokkos::Experimental::end(view);
|
||||||
std::sort(first, last);
|
std::sort(first, last);
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
Impl::sort_device_view_without_comparator(exec, view);
|
Impl::sort_device_view_without_comparator(exec, view);
|
||||||
}
|
}
|
||||||
@ -111,13 +107,9 @@ void sort(const ExecutionSpace& exec,
|
|||||||
|
|
||||||
if constexpr (Impl::better_off_calling_std_sort_v<ExecutionSpace>) {
|
if constexpr (Impl::better_off_calling_std_sort_v<ExecutionSpace>) {
|
||||||
exec.fence("Kokkos::sort with comparator use std::sort");
|
exec.fence("Kokkos::sort with comparator use std::sort");
|
||||||
if (view.span_is_contiguous()) {
|
|
||||||
std::sort(view.data(), view.data() + view.size(), comparator);
|
|
||||||
} else {
|
|
||||||
auto first = ::Kokkos::Experimental::begin(view);
|
auto first = ::Kokkos::Experimental::begin(view);
|
||||||
auto last = ::Kokkos::Experimental::end(view);
|
auto last = ::Kokkos::Experimental::end(view);
|
||||||
std::sort(first, last, comparator);
|
std::sort(first, last, comparator);
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
Impl::sort_device_view_with_comparator(exec, view, comparator);
|
Impl::sort_device_view_with_comparator(exec, view, comparator);
|
||||||
}
|
}
|
||||||
|
|||||||
@ -47,6 +47,7 @@
|
|||||||
#ifdef _CubLog
|
#ifdef _CubLog
|
||||||
#undef _CubLog
|
#undef _CubLog
|
||||||
#endif
|
#endif
|
||||||
|
// NOLINTNEXTLINE(bugprone-reserved-identifier)
|
||||||
#define _CubLog
|
#define _CubLog
|
||||||
#include <thrust/device_ptr.h>
|
#include <thrust/device_ptr.h>
|
||||||
#include <thrust/sort.h>
|
#include <thrust/sort.h>
|
||||||
@ -65,12 +66,24 @@
|
|||||||
#include <thrust/sort.h>
|
#include <thrust/sort.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(KOKKOS_ENABLE_ONEDPL) && \
|
#ifdef KOKKOS_ENABLE_ONEDPL
|
||||||
(ONEDPL_VERSION_MAJOR > 2022 || \
|
#define KOKKOS_IMPL_ONEDPL_VERSION \
|
||||||
(ONEDPL_VERSION_MAJOR == 2022 && ONEDPL_VERSION_MINOR >= 2))
|
ONEDPL_VERSION_MAJOR * 10000 + ONEDPL_VERSION_MINOR * 100 + \
|
||||||
#define KOKKOS_ONEDPL_HAS_SORT_BY_KEY
|
ONEDPL_VERSION_PATCH
|
||||||
|
#define KOKKOS_IMPL_ONEDPL_VERSION_GREATER_EQUAL(MAJOR, MINOR, PATCH) \
|
||||||
|
(KOKKOS_IMPL_ONEDPL_VERSION >= ((MAJOR)*10000 + (MINOR)*100 + (PATCH)))
|
||||||
|
|
||||||
|
#if KOKKOS_IMPL_ONEDPL_VERSION_GREATER_EQUAL(2022, 2, 0)
|
||||||
|
#define KOKKOS_IMPL_ONEDPL_HAS_SORT_BY_KEY
|
||||||
|
#pragma GCC diagnostic push
|
||||||
|
#pragma GCC diagnostic ignored "-Wshadow"
|
||||||
|
#pragma GCC diagnostic ignored "-Wunused-local-typedef"
|
||||||
|
#pragma GCC diagnostic ignored "-Wunused-parameter"
|
||||||
|
#pragma GCC diagnostic ignored "-Wunused-variable"
|
||||||
#include <oneapi/dpl/execution>
|
#include <oneapi/dpl/execution>
|
||||||
#include <oneapi/dpl/algorithm>
|
#include <oneapi/dpl/algorithm>
|
||||||
|
#pragma GCC diagnostic pop
|
||||||
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
namespace Kokkos::Impl {
|
namespace Kokkos::Impl {
|
||||||
@ -141,12 +154,18 @@ void sort_by_key_rocthrust(
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(KOKKOS_ENABLE_ONEDPL)
|
#if defined(KOKKOS_ENABLE_ONEDPL)
|
||||||
|
|
||||||
|
#if KOKKOS_IMPL_ONEDPL_VERSION_GREATER_EQUAL(2022, 7, 1)
|
||||||
|
template <class Layout>
|
||||||
|
inline constexpr bool sort_on_device_v<Kokkos::SYCL, Layout> = true;
|
||||||
|
#else
|
||||||
template <class Layout>
|
template <class Layout>
|
||||||
inline constexpr bool sort_on_device_v<Kokkos::SYCL, Layout> =
|
inline constexpr bool sort_on_device_v<Kokkos::SYCL, Layout> =
|
||||||
std::is_same_v<Layout, Kokkos::LayoutLeft> ||
|
std::is_same_v<Layout, Kokkos::LayoutLeft> ||
|
||||||
std::is_same_v<Layout, Kokkos::LayoutRight>;
|
std::is_same_v<Layout, Kokkos::LayoutRight>;
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef KOKKOS_ONEDPL_HAS_SORT_BY_KEY
|
#ifdef KOKKOS_IMPL_ONEDPL_HAS_SORT_BY_KEY
|
||||||
template <class KeysDataType, class... KeysProperties, class ValuesDataType,
|
template <class KeysDataType, class... KeysProperties, class ValuesDataType,
|
||||||
class... ValuesProperties, class... MaybeComparator>
|
class... ValuesProperties, class... MaybeComparator>
|
||||||
void sort_by_key_onedpl(
|
void sort_by_key_onedpl(
|
||||||
@ -154,6 +173,14 @@ void sort_by_key_onedpl(
|
|||||||
const Kokkos::View<KeysDataType, KeysProperties...>& keys,
|
const Kokkos::View<KeysDataType, KeysProperties...>& keys,
|
||||||
const Kokkos::View<ValuesDataType, ValuesProperties...>& values,
|
const Kokkos::View<ValuesDataType, ValuesProperties...>& values,
|
||||||
MaybeComparator&&... maybeComparator) {
|
MaybeComparator&&... maybeComparator) {
|
||||||
|
auto queue = exec.sycl_queue();
|
||||||
|
auto policy = oneapi::dpl::execution::make_device_policy(queue);
|
||||||
|
#if KOKKOS_IMPL_ONEDPL_VERSION_GREATER_EQUAL(2022, 7, 1)
|
||||||
|
oneapi::dpl::sort_by_key(policy, ::Kokkos::Experimental::begin(keys),
|
||||||
|
::Kokkos::Experimental::end(keys),
|
||||||
|
::Kokkos::Experimental::begin(values),
|
||||||
|
std::forward<MaybeComparator>(maybeComparator)...);
|
||||||
|
#else
|
||||||
if (keys.stride(0) != 1 && values.stride(0) != 1) {
|
if (keys.stride(0) != 1 && values.stride(0) != 1) {
|
||||||
Kokkos::abort(
|
Kokkos::abort(
|
||||||
"SYCL sort_by_key only supports rank-1 Views with stride(0) = 1.");
|
"SYCL sort_by_key only supports rank-1 Views with stride(0) = 1.");
|
||||||
@ -161,11 +188,10 @@ void sort_by_key_onedpl(
|
|||||||
|
|
||||||
// Can't use Experimental::begin/end here since the oneDPL then assumes that
|
// Can't use Experimental::begin/end here since the oneDPL then assumes that
|
||||||
// the data is on the host.
|
// the data is on the host.
|
||||||
auto queue = exec.sycl_queue();
|
|
||||||
auto policy = oneapi::dpl::execution::make_device_policy(queue);
|
|
||||||
const int n = keys.extent(0);
|
const int n = keys.extent(0);
|
||||||
oneapi::dpl::sort_by_key(policy, keys.data(), keys.data() + n, values.data(),
|
oneapi::dpl::sort_by_key(policy, keys.data(), keys.data() + n, values.data(),
|
||||||
std::forward<MaybeComparator>(maybeComparator)...);
|
std::forward<MaybeComparator>(maybeComparator)...);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
@ -336,12 +362,18 @@ void sort_by_key_device_view_without_comparator(
|
|||||||
const Kokkos::SYCL& exec,
|
const Kokkos::SYCL& exec,
|
||||||
const Kokkos::View<KeysDataType, KeysProperties...>& keys,
|
const Kokkos::View<KeysDataType, KeysProperties...>& keys,
|
||||||
const Kokkos::View<ValuesDataType, ValuesProperties...>& values) {
|
const Kokkos::View<ValuesDataType, ValuesProperties...>& values) {
|
||||||
#ifdef KOKKOS_ONEDPL_HAS_SORT_BY_KEY
|
#ifdef KOKKOS_IMPL_ONEDPL_HAS_SORT_BY_KEY
|
||||||
|
#if KOKKOS_IMPL_ONEDPL_VERSION_GREATER_EQUAL(2022, 7, 1)
|
||||||
|
sort_by_key_onedpl(exec, keys, values);
|
||||||
|
#else
|
||||||
if (keys.stride(0) == 1 && values.stride(0) == 1)
|
if (keys.stride(0) == 1 && values.stride(0) == 1)
|
||||||
sort_by_key_onedpl(exec, keys, values);
|
sort_by_key_onedpl(exec, keys, values);
|
||||||
else
|
else
|
||||||
#endif
|
|
||||||
sort_by_key_via_sort(exec, keys, values);
|
sort_by_key_via_sort(exec, keys, values);
|
||||||
|
#endif
|
||||||
|
#else
|
||||||
|
sort_by_key_via_sort(exec, keys, values);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -394,12 +426,18 @@ void sort_by_key_device_view_with_comparator(
|
|||||||
const Kokkos::View<KeysDataType, KeysProperties...>& keys,
|
const Kokkos::View<KeysDataType, KeysProperties...>& keys,
|
||||||
const Kokkos::View<ValuesDataType, ValuesProperties...>& values,
|
const Kokkos::View<ValuesDataType, ValuesProperties...>& values,
|
||||||
const ComparatorType& comparator) {
|
const ComparatorType& comparator) {
|
||||||
#ifdef KOKKOS_ONEDPL_HAS_SORT_BY_KEY
|
#ifdef KOKKOS_IMPL_ONEDPL_HAS_SORT_BY_KEY
|
||||||
|
#if KOKKOS_IMPL_ONEDPL_VERSION_GREATER_EQUAL(2022, 7, 1)
|
||||||
|
sort_by_key_onedpl(exec, keys, values, comparator);
|
||||||
|
#else
|
||||||
if (keys.stride(0) == 1 && values.stride(0) == 1)
|
if (keys.stride(0) == 1 && values.stride(0) == 1)
|
||||||
sort_by_key_onedpl(exec, keys, values, comparator);
|
sort_by_key_onedpl(exec, keys, values, comparator);
|
||||||
else
|
else
|
||||||
#endif
|
|
||||||
sort_by_key_via_sort(exec, keys, values, comparator);
|
sort_by_key_via_sort(exec, keys, values, comparator);
|
||||||
|
#endif
|
||||||
|
#else
|
||||||
|
sort_by_key_via_sort(exec, keys, values, comparator);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -416,7 +454,9 @@ sort_by_key_device_view_with_comparator(
|
|||||||
sort_by_key_via_sort(exec, keys, values, comparator);
|
sort_by_key_via_sort(exec, keys, values, comparator);
|
||||||
}
|
}
|
||||||
|
|
||||||
#undef KOKKOS_ONEDPL_HAS_SORT_BY_KEY
|
#undef KOKKOS_IMPL_ONEDPL_HAS_SORT_BY_KEY
|
||||||
|
|
||||||
} // namespace Kokkos::Impl
|
} // namespace Kokkos::Impl
|
||||||
|
#undef KOKKOS_IMPL_ONEDPL_VERSION
|
||||||
|
#undef KOKKOS_IMPL_ONEDPL_VERSION_GREATER_EQUAL
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@ -51,6 +51,7 @@
|
|||||||
#ifdef _CubLog
|
#ifdef _CubLog
|
||||||
#undef _CubLog
|
#undef _CubLog
|
||||||
#endif
|
#endif
|
||||||
|
// NOLINTNEXTLINE(bugprone-reserved-identifier)
|
||||||
#define _CubLog
|
#define _CubLog
|
||||||
#include <thrust/device_ptr.h>
|
#include <thrust/device_ptr.h>
|
||||||
#include <thrust/sort.h>
|
#include <thrust/sort.h>
|
||||||
@ -70,8 +71,20 @@
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(KOKKOS_ENABLE_ONEDPL)
|
#if defined(KOKKOS_ENABLE_ONEDPL)
|
||||||
|
#pragma GCC diagnostic push
|
||||||
|
#pragma GCC diagnostic ignored "-Wshadow"
|
||||||
|
#pragma GCC diagnostic ignored "-Wunused-local-typedef"
|
||||||
|
#pragma GCC diagnostic ignored "-Wunused-parameter"
|
||||||
|
#pragma GCC diagnostic ignored "-Wunused-variable"
|
||||||
#include <oneapi/dpl/execution>
|
#include <oneapi/dpl/execution>
|
||||||
#include <oneapi/dpl/algorithm>
|
#include <oneapi/dpl/algorithm>
|
||||||
|
#pragma GCC diagnostic pop
|
||||||
|
|
||||||
|
#define KOKKOS_IMPL_ONEDPL_VERSION \
|
||||||
|
ONEDPL_VERSION_MAJOR * 10000 + ONEDPL_VERSION_MINOR * 100 + \
|
||||||
|
ONEDPL_VERSION_PATCH
|
||||||
|
#define KOKKOS_IMPL_ONEDPL_VERSION_GREATER_EQUAL(MAJOR, MINOR, PATCH) \
|
||||||
|
(KOKKOS_IMPL_ONEDPL_VERSION >= ((MAJOR)*10000 + (MINOR)*100 + (PATCH)))
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
namespace Kokkos {
|
namespace Kokkos {
|
||||||
@ -221,6 +234,10 @@ void sort_onedpl(const Kokkos::SYCL& space,
|
|||||||
"SYCL execution space is not able to access the memory space "
|
"SYCL execution space is not able to access the memory space "
|
||||||
"of the View argument!");
|
"of the View argument!");
|
||||||
|
|
||||||
|
#if KOKKOS_IMPL_ONEDPL_VERSION_GREATER_EQUAL(2022, 7, 1)
|
||||||
|
static_assert(ViewType::rank == 1,
|
||||||
|
"Kokkos::sort currently only supports rank-1 Views.");
|
||||||
|
#else
|
||||||
static_assert(
|
static_assert(
|
||||||
(ViewType::rank == 1) &&
|
(ViewType::rank == 1) &&
|
||||||
(std::is_same_v<typename ViewType::array_layout, LayoutRight> ||
|
(std::is_same_v<typename ViewType::array_layout, LayoutRight> ||
|
||||||
@ -234,18 +251,26 @@ void sort_onedpl(const Kokkos::SYCL& space,
|
|||||||
if (view.stride(0) != 1) {
|
if (view.stride(0) != 1) {
|
||||||
Kokkos::abort("SYCL sort only supports rank-1 Views with stride(0) = 1.");
|
Kokkos::abort("SYCL sort only supports rank-1 Views with stride(0) = 1.");
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
if (view.extent(0) <= 1) {
|
if (view.extent(0) <= 1) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Can't use Experimental::begin/end here since the oneDPL then assumes that
|
|
||||||
// the data is on the host.
|
|
||||||
auto queue = space.sycl_queue();
|
auto queue = space.sycl_queue();
|
||||||
auto policy = oneapi::dpl::execution::make_device_policy(queue);
|
auto policy = oneapi::dpl::execution::make_device_policy(queue);
|
||||||
|
|
||||||
|
#if KOKKOS_IMPL_ONEDPL_VERSION_GREATER_EQUAL(2022, 7, 1)
|
||||||
|
oneapi::dpl::sort(policy, ::Kokkos::Experimental::begin(view),
|
||||||
|
::Kokkos::Experimental::end(view),
|
||||||
|
std::forward<MaybeComparator>(maybeComparator)...);
|
||||||
|
#else
|
||||||
|
// Can't use Experimental::begin/end here since the oneDPL then assumes that
|
||||||
|
// the data is on the host.
|
||||||
const int n = view.extent(0);
|
const int n = view.extent(0);
|
||||||
oneapi::dpl::sort(policy, view.data(), view.data() + n,
|
oneapi::dpl::sort(policy, view.data(), view.data() + n,
|
||||||
std::forward<MaybeComparator>(maybeComparator)...);
|
std::forward<MaybeComparator>(maybeComparator)...);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -270,28 +295,18 @@ void copy_to_host_run_stdsort_copy_back(
|
|||||||
|
|
||||||
// run sort on the mirror of view_dc
|
// run sort on the mirror of view_dc
|
||||||
auto mv_h = create_mirror_view_and_copy(Kokkos::HostSpace(), view_dc);
|
auto mv_h = create_mirror_view_and_copy(Kokkos::HostSpace(), view_dc);
|
||||||
if (view.span_is_contiguous()) {
|
|
||||||
std::sort(mv_h.data(), mv_h.data() + mv_h.size(),
|
|
||||||
std::forward<MaybeComparator>(maybeComparator)...);
|
|
||||||
} else {
|
|
||||||
auto first = KE::begin(mv_h);
|
auto first = KE::begin(mv_h);
|
||||||
auto last = KE::end(mv_h);
|
auto last = KE::end(mv_h);
|
||||||
std::sort(first, last, std::forward<MaybeComparator>(maybeComparator)...);
|
std::sort(first, last, std::forward<MaybeComparator>(maybeComparator)...);
|
||||||
}
|
|
||||||
Kokkos::deep_copy(exec, view_dc, mv_h);
|
Kokkos::deep_copy(exec, view_dc, mv_h);
|
||||||
|
|
||||||
// copy back to argument view
|
// copy back to argument view
|
||||||
KE::copy(exec, KE::cbegin(view_dc), KE::cend(view_dc), KE::begin(view));
|
KE::copy(exec, KE::cbegin(view_dc), KE::cend(view_dc), KE::begin(view));
|
||||||
} else {
|
} else {
|
||||||
auto view_h = create_mirror_view_and_copy(Kokkos::HostSpace(), view);
|
auto view_h = create_mirror_view_and_copy(Kokkos::HostSpace(), view);
|
||||||
if (view.span_is_contiguous()) {
|
|
||||||
std::sort(view_h.data(), view_h.data() + view_h.size(),
|
|
||||||
std::forward<MaybeComparator>(maybeComparator)...);
|
|
||||||
} else {
|
|
||||||
auto first = KE::begin(view_h);
|
auto first = KE::begin(view_h);
|
||||||
auto last = KE::end(view_h);
|
auto last = KE::end(view_h);
|
||||||
std::sort(first, last, std::forward<MaybeComparator>(maybeComparator)...);
|
std::sort(first, last, std::forward<MaybeComparator>(maybeComparator)...);
|
||||||
}
|
|
||||||
Kokkos::deep_copy(exec, view, view_h);
|
Kokkos::deep_copy(exec, view, view_h);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -332,11 +347,15 @@ void sort_device_view_without_comparator(
|
|||||||
"sort_device_view_without_comparator: supports rank-1 Views "
|
"sort_device_view_without_comparator: supports rank-1 Views "
|
||||||
"with LayoutLeft, LayoutRight or LayoutStride");
|
"with LayoutLeft, LayoutRight or LayoutStride");
|
||||||
|
|
||||||
|
#if KOKKOS_IMPL_ONEDPL_VERSION_GREATER_EQUAL(2022, 7, 1)
|
||||||
|
sort_onedpl(exec, view);
|
||||||
|
#else
|
||||||
if (view.stride(0) == 1) {
|
if (view.stride(0) == 1) {
|
||||||
sort_onedpl(exec, view);
|
sort_onedpl(exec, view);
|
||||||
} else {
|
} else {
|
||||||
copy_to_host_run_stdsort_copy_back(exec, view);
|
copy_to_host_run_stdsort_copy_back(exec, view);
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -387,11 +406,15 @@ void sort_device_view_with_comparator(
|
|||||||
"sort_device_view_with_comparator: supports rank-1 Views "
|
"sort_device_view_with_comparator: supports rank-1 Views "
|
||||||
"with LayoutLeft, LayoutRight or LayoutStride");
|
"with LayoutLeft, LayoutRight or LayoutStride");
|
||||||
|
|
||||||
|
#if KOKKOS_IMPL_ONEDPL_VERSION_GREATER_EQUAL(2022, 7, 1)
|
||||||
|
sort_onedpl(exec, view, comparator);
|
||||||
|
#else
|
||||||
if (view.stride(0) == 1) {
|
if (view.stride(0) == 1) {
|
||||||
sort_onedpl(exec, view, comparator);
|
sort_onedpl(exec, view, comparator);
|
||||||
} else {
|
} else {
|
||||||
copy_to_host_run_stdsort_copy_back(exec, view, comparator);
|
copy_to_host_run_stdsort_copy_back(exec, view, comparator);
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -423,4 +446,7 @@ sort_device_view_with_comparator(
|
|||||||
|
|
||||||
} // namespace Impl
|
} // namespace Impl
|
||||||
} // namespace Kokkos
|
} // namespace Kokkos
|
||||||
|
|
||||||
|
#undef KOKKOS_IMPL_ONEDPL_VERSION
|
||||||
|
#undef KOKKOS_IMPL_ONEDPL_VERSION_GREATER_EQUAL
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@ -238,12 +238,9 @@ KOKKOS_INLINE_FUNCTION void expect_no_overlap(
|
|||||||
[[maybe_unused]] IteratorType2 s_first) {
|
[[maybe_unused]] IteratorType2 s_first) {
|
||||||
if constexpr (is_kokkos_iterator_v<IteratorType1> &&
|
if constexpr (is_kokkos_iterator_v<IteratorType1> &&
|
||||||
is_kokkos_iterator_v<IteratorType2>) {
|
is_kokkos_iterator_v<IteratorType2>) {
|
||||||
auto const view1 = first.view();
|
std::size_t stride1 = first.stride();
|
||||||
auto const view2 = s_first.view();
|
std::size_t stride2 = s_first.stride();
|
||||||
|
ptrdiff_t first_diff = first.data() - s_first.data();
|
||||||
std::size_t stride1 = view1.stride(0);
|
|
||||||
std::size_t stride2 = view2.stride(0);
|
|
||||||
ptrdiff_t first_diff = view1.data() - view2.data();
|
|
||||||
|
|
||||||
// FIXME If strides are not identical, checks may not be made
|
// FIXME If strides are not identical, checks may not be made
|
||||||
// with the cost of O(1)
|
// with the cost of O(1)
|
||||||
@ -251,8 +248,8 @@ KOKKOS_INLINE_FUNCTION void expect_no_overlap(
|
|||||||
// If first_diff == 0, there is already an overlap
|
// If first_diff == 0, there is already an overlap
|
||||||
if (stride1 == stride2 || first_diff == 0) {
|
if (stride1 == stride2 || first_diff == 0) {
|
||||||
[[maybe_unused]] bool is_no_overlap = (first_diff % stride1);
|
[[maybe_unused]] bool is_no_overlap = (first_diff % stride1);
|
||||||
auto* first_pointer1 = view1.data();
|
auto* first_pointer1 = first.data();
|
||||||
auto* first_pointer2 = view2.data();
|
auto* first_pointer2 = s_first.data();
|
||||||
[[maybe_unused]] auto* last_pointer1 = first_pointer1 + (last - first);
|
[[maybe_unused]] auto* last_pointer1 = first_pointer1 + (last - first);
|
||||||
[[maybe_unused]] auto* last_pointer2 = first_pointer2 + (last - first);
|
[[maybe_unused]] auto* last_pointer2 = first_pointer2 + (last - first);
|
||||||
KOKKOS_EXPECTS(first_pointer1 >= last_pointer2 ||
|
KOKKOS_EXPECTS(first_pointer1 >= last_pointer2 ||
|
||||||
|
|||||||
@ -150,9 +150,8 @@ KOKKOS_FUNCTION OutputIterator copy_if_team_impl(
|
|||||||
return d_first + count;
|
return d_first + count;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined KOKKOS_COMPILER_INTEL || \
|
#if defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \
|
||||||
(defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \
|
!defined(KOKKOS_COMPILER_MSVC)
|
||||||
!defined(KOKKOS_COMPILER_MSVC))
|
|
||||||
__builtin_unreachable();
|
__builtin_unreachable();
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|||||||
@ -103,7 +103,7 @@ OutputIteratorType exclusive_scan_custom_op_exespace_impl(
|
|||||||
|
|
||||||
// aliases
|
// aliases
|
||||||
using index_type = typename InputIteratorType::difference_type;
|
using index_type = typename InputIteratorType::difference_type;
|
||||||
using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor<ValueType>;
|
using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor;
|
||||||
using func_type = TransformExclusiveScanFunctorWithValueWrapper<
|
using func_type = TransformExclusiveScanFunctorWithValueWrapper<
|
||||||
ExecutionSpace, index_type, ValueType, InputIteratorType,
|
ExecutionSpace, index_type, ValueType, InputIteratorType,
|
||||||
OutputIteratorType, BinaryOpType, unary_op_type>;
|
OutputIteratorType, BinaryOpType, unary_op_type>;
|
||||||
@ -177,7 +177,7 @@ KOKKOS_FUNCTION OutputIteratorType exclusive_scan_custom_op_team_impl(
|
|||||||
|
|
||||||
// aliases
|
// aliases
|
||||||
using exe_space = typename TeamHandleType::execution_space;
|
using exe_space = typename TeamHandleType::execution_space;
|
||||||
using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor<ValueType>;
|
using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor;
|
||||||
using index_type = typename InputIteratorType::difference_type;
|
using index_type = typename InputIteratorType::difference_type;
|
||||||
using func_type = TransformExclusiveScanFunctorWithoutValueWrapper<
|
using func_type = TransformExclusiveScanFunctorWithoutValueWrapper<
|
||||||
exe_space, index_type, ValueType, InputIteratorType, OutputIteratorType,
|
exe_space, index_type, ValueType, InputIteratorType, OutputIteratorType,
|
||||||
|
|||||||
@ -23,10 +23,11 @@ namespace Kokkos {
|
|||||||
namespace Experimental {
|
namespace Experimental {
|
||||||
namespace Impl {
|
namespace Impl {
|
||||||
|
|
||||||
template <class ValueType>
|
|
||||||
struct StdNumericScanIdentityReferenceUnaryFunctor {
|
struct StdNumericScanIdentityReferenceUnaryFunctor {
|
||||||
KOKKOS_FUNCTION
|
template <class T>
|
||||||
constexpr const ValueType& operator()(const ValueType& a) const { return a; }
|
KOKKOS_FUNCTION constexpr T&& operator()(T&& t) const {
|
||||||
|
return static_cast<T&&>(t);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace Impl
|
} // namespace Impl
|
||||||
|
|||||||
@ -18,12 +18,60 @@
|
|||||||
#define KOKKOS_STD_ALGORITHMS_INCLUSIVE_SCAN_IMPL_HPP
|
#define KOKKOS_STD_ALGORITHMS_INCLUSIVE_SCAN_IMPL_HPP
|
||||||
|
|
||||||
#include <Kokkos_Core.hpp>
|
#include <Kokkos_Core.hpp>
|
||||||
|
#include <Kokkos_Profiling_ScopedRegion.hpp>
|
||||||
#include "Kokkos_Constraints.hpp"
|
#include "Kokkos_Constraints.hpp"
|
||||||
#include "Kokkos_HelperPredicates.hpp"
|
#include "Kokkos_HelperPredicates.hpp"
|
||||||
#include <std_algorithms/Kokkos_TransformInclusiveScan.hpp>
|
#include <std_algorithms/Kokkos_TransformInclusiveScan.hpp>
|
||||||
#include <std_algorithms/Kokkos_Distance.hpp>
|
#include <std_algorithms/Kokkos_Distance.hpp>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
|
#if defined(KOKKOS_ENABLE_CUDA)
|
||||||
|
|
||||||
|
// Workaround for `Instruction 'shfl' without '.sync' is not supported on
|
||||||
|
// .target sm_70 and higher from PTX ISA version 6.4`.
|
||||||
|
// Also see https://github.com/NVIDIA/cub/pull/170.
|
||||||
|
#if !defined(CUB_USE_COOPERATIVE_GROUPS)
|
||||||
|
#define CUB_USE_COOPERATIVE_GROUPS
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#pragma GCC diagnostic push
|
||||||
|
#pragma GCC diagnostic ignored "-Wshadow"
|
||||||
|
#pragma GCC diagnostic ignored "-Wsuggest-override"
|
||||||
|
|
||||||
|
#if defined(KOKKOS_COMPILER_CLANG)
|
||||||
|
// Some versions of Clang fail to compile Thrust, failing with errors like
|
||||||
|
// this:
|
||||||
|
// <snip>/thrust/system/cuda/detail/core/agent_launcher.h:557:11:
|
||||||
|
// error: use of undeclared identifier 'va_printf'
|
||||||
|
// The exact combination of versions for Clang and Thrust (or CUDA) for this
|
||||||
|
// failure was not investigated, however even very recent version combination
|
||||||
|
// (Clang 10.0.0 and Cuda 10.0) demonstrated failure.
|
||||||
|
//
|
||||||
|
// Defining _CubLog here locally allows us to avoid that code path, however
|
||||||
|
// disabling some debugging diagnostics
|
||||||
|
#pragma push_macro("_CubLog")
|
||||||
|
#ifdef _CubLog
|
||||||
|
#undef _CubLog
|
||||||
|
#endif
|
||||||
|
// NOLINTNEXTLINE(bugprone-reserved-identifier)
|
||||||
|
#define _CubLog
|
||||||
|
#include <thrust/distance.h>
|
||||||
|
#include <thrust/scan.h>
|
||||||
|
#pragma pop_macro("_CubLog")
|
||||||
|
#else
|
||||||
|
#include <thrust/distance.h>
|
||||||
|
#include <thrust/scan.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#pragma GCC diagnostic pop
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(KOKKOS_ENABLE_ROCTHRUST)
|
||||||
|
#include <thrust/distance.h>
|
||||||
|
#include <thrust/scan.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
namespace Kokkos {
|
namespace Kokkos {
|
||||||
namespace Experimental {
|
namespace Experimental {
|
||||||
namespace Impl {
|
namespace Impl {
|
||||||
@ -101,9 +149,48 @@ struct InclusiveScanDefaultFunctor {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
//
|
// -------------------------------------------------------------
|
||||||
// exespace impl
|
// inclusive_scan_default_op_exespace_impl
|
||||||
//
|
// -------------------------------------------------------------
|
||||||
|
|
||||||
|
#if defined(KOKKOS_ENABLE_CUDA)
|
||||||
|
template <class InputIteratorType, class OutputIteratorType>
|
||||||
|
OutputIteratorType inclusive_scan_default_op_exespace_impl(
|
||||||
|
const std::string& label, const Cuda& ex, InputIteratorType first_from,
|
||||||
|
InputIteratorType last_from, OutputIteratorType first_dest) {
|
||||||
|
const auto thrust_ex = thrust::cuda::par.on(ex.cuda_stream());
|
||||||
|
|
||||||
|
Kokkos::Profiling::pushRegion(label + " via thrust::inclusive_scan");
|
||||||
|
|
||||||
|
thrust::inclusive_scan(thrust_ex, first_from, last_from, first_dest);
|
||||||
|
|
||||||
|
Kokkos::Profiling::popRegion();
|
||||||
|
|
||||||
|
const auto num_elements = thrust::distance(first_from, last_from);
|
||||||
|
|
||||||
|
return first_dest + num_elements;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(KOKKOS_ENABLE_ROCTHRUST)
|
||||||
|
template <class InputIteratorType, class OutputIteratorType>
|
||||||
|
OutputIteratorType inclusive_scan_default_op_exespace_impl(
|
||||||
|
const std::string& label, const HIP& ex, InputIteratorType first_from,
|
||||||
|
InputIteratorType last_from, OutputIteratorType first_dest) {
|
||||||
|
const auto thrust_ex = thrust::hip::par.on(ex.hip_stream());
|
||||||
|
|
||||||
|
Kokkos::Profiling::pushRegion(label + " via thrust::inclusive_scan");
|
||||||
|
|
||||||
|
thrust::inclusive_scan(thrust_ex, first_from, last_from, first_dest);
|
||||||
|
|
||||||
|
Kokkos::Profiling::popRegion();
|
||||||
|
|
||||||
|
const auto num_elements = thrust::distance(first_from, last_from);
|
||||||
|
|
||||||
|
return first_dest + num_elements;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
template <class ExecutionSpace, class InputIteratorType,
|
template <class ExecutionSpace, class InputIteratorType,
|
||||||
class OutputIteratorType>
|
class OutputIteratorType>
|
||||||
OutputIteratorType inclusive_scan_default_op_exespace_impl(
|
OutputIteratorType inclusive_scan_default_op_exespace_impl(
|
||||||
@ -132,11 +219,16 @@ OutputIteratorType inclusive_scan_default_op_exespace_impl(
|
|||||||
// run
|
// run
|
||||||
const auto num_elements =
|
const auto num_elements =
|
||||||
Kokkos::Experimental::distance(first_from, last_from);
|
Kokkos::Experimental::distance(first_from, last_from);
|
||||||
|
|
||||||
|
Kokkos::Profiling::pushRegion(label + " via Kokkos::parallel_scan");
|
||||||
|
|
||||||
::Kokkos::parallel_scan(label,
|
::Kokkos::parallel_scan(label,
|
||||||
RangePolicy<ExecutionSpace>(ex, 0, num_elements),
|
RangePolicy<ExecutionSpace>(ex, 0, num_elements),
|
||||||
func_type(first_from, first_dest));
|
func_type(first_from, first_dest));
|
||||||
ex.fence("Kokkos::inclusive_scan_default_op: fence after operation");
|
ex.fence("Kokkos::inclusive_scan_default_op: fence after operation");
|
||||||
|
|
||||||
|
Kokkos::Profiling::popRegion();
|
||||||
|
|
||||||
// return
|
// return
|
||||||
return first_dest + num_elements;
|
return first_dest + num_elements;
|
||||||
}
|
}
|
||||||
@ -144,6 +236,49 @@ OutputIteratorType inclusive_scan_default_op_exespace_impl(
|
|||||||
// -------------------------------------------------------------
|
// -------------------------------------------------------------
|
||||||
// inclusive_scan_custom_binary_op_impl
|
// inclusive_scan_custom_binary_op_impl
|
||||||
// -------------------------------------------------------------
|
// -------------------------------------------------------------
|
||||||
|
|
||||||
|
#if defined(KOKKOS_ENABLE_CUDA)
|
||||||
|
template <class InputIteratorType, class OutputIteratorType, class BinaryOpType>
|
||||||
|
OutputIteratorType inclusive_scan_custom_binary_op_exespace_impl(
|
||||||
|
const std::string& label, const Cuda& ex, InputIteratorType first_from,
|
||||||
|
InputIteratorType last_from, OutputIteratorType first_dest,
|
||||||
|
BinaryOpType binary_op) {
|
||||||
|
const auto thrust_ex = thrust::cuda::par.on(ex.cuda_stream());
|
||||||
|
|
||||||
|
Kokkos::Profiling::pushRegion(label + " via thrust::inclusive_scan");
|
||||||
|
|
||||||
|
thrust::inclusive_scan(thrust_ex, first_from, last_from, first_dest,
|
||||||
|
binary_op);
|
||||||
|
|
||||||
|
Kokkos::Profiling::popRegion();
|
||||||
|
|
||||||
|
const auto num_elements = thrust::distance(first_from, last_from);
|
||||||
|
|
||||||
|
return first_dest + num_elements;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(KOKKOS_ENABLE_ROCTHRUST)
|
||||||
|
template <class InputIteratorType, class OutputIteratorType, class BinaryOpType>
|
||||||
|
OutputIteratorType inclusive_scan_custom_binary_op_exespace_impl(
|
||||||
|
const std::string& label, const HIP& ex, InputIteratorType first_from,
|
||||||
|
InputIteratorType last_from, OutputIteratorType first_dest,
|
||||||
|
BinaryOpType binary_op) {
|
||||||
|
const auto thrust_ex = thrust::hip::par.on(ex.hip_stream());
|
||||||
|
|
||||||
|
Kokkos::Profiling::pushRegion(label + " via thrust::inclusive_scan");
|
||||||
|
|
||||||
|
thrust::inclusive_scan(thrust_ex, first_from, last_from, first_dest,
|
||||||
|
binary_op);
|
||||||
|
|
||||||
|
Kokkos::Profiling::popRegion();
|
||||||
|
|
||||||
|
const auto num_elements = thrust::distance(first_from, last_from);
|
||||||
|
|
||||||
|
return first_dest + num_elements;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
template <class ExecutionSpace, class InputIteratorType,
|
template <class ExecutionSpace, class InputIteratorType,
|
||||||
class OutputIteratorType, class BinaryOpType>
|
class OutputIteratorType, class BinaryOpType>
|
||||||
OutputIteratorType inclusive_scan_custom_binary_op_exespace_impl(
|
OutputIteratorType inclusive_scan_custom_binary_op_exespace_impl(
|
||||||
@ -160,7 +295,7 @@ OutputIteratorType inclusive_scan_custom_binary_op_exespace_impl(
|
|||||||
using index_type = typename InputIteratorType::difference_type;
|
using index_type = typename InputIteratorType::difference_type;
|
||||||
using value_type =
|
using value_type =
|
||||||
std::remove_const_t<typename InputIteratorType::value_type>;
|
std::remove_const_t<typename InputIteratorType::value_type>;
|
||||||
using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor<value_type>;
|
using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor;
|
||||||
using func_type = ExeSpaceTransformInclusiveScanNoInitValueFunctor<
|
using func_type = ExeSpaceTransformInclusiveScanNoInitValueFunctor<
|
||||||
ExecutionSpace, index_type, value_type, InputIteratorType,
|
ExecutionSpace, index_type, value_type, InputIteratorType,
|
||||||
OutputIteratorType, BinaryOpType, unary_op_type>;
|
OutputIteratorType, BinaryOpType, unary_op_type>;
|
||||||
@ -168,11 +303,16 @@ OutputIteratorType inclusive_scan_custom_binary_op_exespace_impl(
|
|||||||
// run
|
// run
|
||||||
const auto num_elements =
|
const auto num_elements =
|
||||||
Kokkos::Experimental::distance(first_from, last_from);
|
Kokkos::Experimental::distance(first_from, last_from);
|
||||||
|
|
||||||
|
Kokkos::Profiling::pushRegion(label + " via Kokkos::parallel_scan");
|
||||||
|
|
||||||
::Kokkos::parallel_scan(
|
::Kokkos::parallel_scan(
|
||||||
label, RangePolicy<ExecutionSpace>(ex, 0, num_elements),
|
label, RangePolicy<ExecutionSpace>(ex, 0, num_elements),
|
||||||
func_type(first_from, first_dest, binary_op, unary_op_type()));
|
func_type(first_from, first_dest, binary_op, unary_op_type()));
|
||||||
ex.fence("Kokkos::inclusive_scan_custom_binary_op: fence after operation");
|
ex.fence("Kokkos::inclusive_scan_custom_binary_op: fence after operation");
|
||||||
|
|
||||||
|
Kokkos::Profiling::popRegion();
|
||||||
|
|
||||||
// return
|
// return
|
||||||
return first_dest + num_elements;
|
return first_dest + num_elements;
|
||||||
}
|
}
|
||||||
@ -195,7 +335,7 @@ OutputIteratorType inclusive_scan_custom_binary_op_exespace_impl(
|
|||||||
|
|
||||||
// aliases
|
// aliases
|
||||||
using index_type = typename InputIteratorType::difference_type;
|
using index_type = typename InputIteratorType::difference_type;
|
||||||
using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor<ValueType>;
|
using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor;
|
||||||
using func_type = ExeSpaceTransformInclusiveScanWithInitValueFunctor<
|
using func_type = ExeSpaceTransformInclusiveScanWithInitValueFunctor<
|
||||||
ExecutionSpace, index_type, ValueType, InputIteratorType,
|
ExecutionSpace, index_type, ValueType, InputIteratorType,
|
||||||
OutputIteratorType, BinaryOpType, unary_op_type>;
|
OutputIteratorType, BinaryOpType, unary_op_type>;
|
||||||
@ -203,12 +343,17 @@ OutputIteratorType inclusive_scan_custom_binary_op_exespace_impl(
|
|||||||
// run
|
// run
|
||||||
const auto num_elements =
|
const auto num_elements =
|
||||||
Kokkos::Experimental::distance(first_from, last_from);
|
Kokkos::Experimental::distance(first_from, last_from);
|
||||||
|
|
||||||
|
Kokkos::Profiling::pushRegion(label + " via Kokkos::parallel_scan");
|
||||||
|
|
||||||
::Kokkos::parallel_scan(label,
|
::Kokkos::parallel_scan(label,
|
||||||
RangePolicy<ExecutionSpace>(ex, 0, num_elements),
|
RangePolicy<ExecutionSpace>(ex, 0, num_elements),
|
||||||
func_type(first_from, first_dest, binary_op,
|
func_type(first_from, first_dest, binary_op,
|
||||||
unary_op_type(), std::move(init_value)));
|
unary_op_type(), std::move(init_value)));
|
||||||
ex.fence("Kokkos::inclusive_scan_custom_binary_op: fence after operation");
|
ex.fence("Kokkos::inclusive_scan_custom_binary_op: fence after operation");
|
||||||
|
|
||||||
|
Kokkos::Profiling::popRegion();
|
||||||
|
|
||||||
// return
|
// return
|
||||||
return first_dest + num_elements;
|
return first_dest + num_elements;
|
||||||
}
|
}
|
||||||
@ -283,7 +428,7 @@ KOKKOS_FUNCTION OutputIteratorType inclusive_scan_custom_binary_op_team_impl(
|
|||||||
|
|
||||||
// aliases
|
// aliases
|
||||||
using exe_space = typename TeamHandleType::execution_space;
|
using exe_space = typename TeamHandleType::execution_space;
|
||||||
using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor<value_type>;
|
using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor;
|
||||||
using func_type = TeamTransformInclusiveScanNoInitValueFunctor<
|
using func_type = TeamTransformInclusiveScanNoInitValueFunctor<
|
||||||
exe_space, value_type, InputIteratorType, OutputIteratorType,
|
exe_space, value_type, InputIteratorType, OutputIteratorType,
|
||||||
BinaryOpType, unary_op_type>;
|
BinaryOpType, unary_op_type>;
|
||||||
@ -291,7 +436,6 @@ KOKKOS_FUNCTION OutputIteratorType inclusive_scan_custom_binary_op_team_impl(
|
|||||||
// run
|
// run
|
||||||
const auto num_elements =
|
const auto num_elements =
|
||||||
Kokkos::Experimental::distance(first_from, last_from);
|
Kokkos::Experimental::distance(first_from, last_from);
|
||||||
|
|
||||||
::Kokkos::parallel_scan(
|
::Kokkos::parallel_scan(
|
||||||
TeamThreadRange(teamHandle, 0, num_elements),
|
TeamThreadRange(teamHandle, 0, num_elements),
|
||||||
func_type(first_from, first_dest, binary_op, unary_op_type()));
|
func_type(first_from, first_dest, binary_op, unary_op_type()));
|
||||||
@ -325,7 +469,7 @@ KOKKOS_FUNCTION OutputIteratorType inclusive_scan_custom_binary_op_team_impl(
|
|||||||
|
|
||||||
// aliases
|
// aliases
|
||||||
using exe_space = typename TeamHandleType::execution_space;
|
using exe_space = typename TeamHandleType::execution_space;
|
||||||
using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor<ValueType>;
|
using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor;
|
||||||
using func_type = TeamTransformInclusiveScanWithInitValueFunctor<
|
using func_type = TeamTransformInclusiveScanWithInitValueFunctor<
|
||||||
exe_space, ValueType, InputIteratorType, OutputIteratorType, BinaryOpType,
|
exe_space, ValueType, InputIteratorType, OutputIteratorType, BinaryOpType,
|
||||||
unary_op_type>;
|
unary_op_type>;
|
||||||
|
|||||||
@ -18,6 +18,7 @@
|
|||||||
#define KOKKOS_RANDOM_ACCESS_ITERATOR_IMPL_HPP
|
#define KOKKOS_RANDOM_ACCESS_ITERATOR_IMPL_HPP
|
||||||
|
|
||||||
#include <iterator>
|
#include <iterator>
|
||||||
|
#include <utility> // declval
|
||||||
#include <Kokkos_Macros.hpp>
|
#include <Kokkos_Macros.hpp>
|
||||||
#include <Kokkos_View.hpp>
|
#include <Kokkos_View.hpp>
|
||||||
#include "Kokkos_Constraints.hpp"
|
#include "Kokkos_Constraints.hpp"
|
||||||
@ -29,8 +30,29 @@ namespace Impl {
|
|||||||
template <class T>
|
template <class T>
|
||||||
class RandomAccessIterator;
|
class RandomAccessIterator;
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
template <typename ViewType>
|
||||||
|
struct is_always_strided {
|
||||||
|
static_assert(is_view_v<ViewType>);
|
||||||
|
|
||||||
|
constexpr static bool value =
|
||||||
|
#ifdef KOKKOS_ENABLE_IMPL_MDSPAN
|
||||||
|
decltype(std::declval<ViewType>().to_mdspan())::is_always_strided();
|
||||||
|
#else
|
||||||
|
(std::is_same_v<typename ViewType::traits::array_layout,
|
||||||
|
Kokkos::LayoutLeft> ||
|
||||||
|
std::is_same_v<typename ViewType::traits::array_layout,
|
||||||
|
Kokkos::LayoutRight> ||
|
||||||
|
std::is_same_v<typename ViewType::traits::array_layout,
|
||||||
|
Kokkos::LayoutStride>);
|
||||||
|
#endif
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
template <class DataType, class... Args>
|
template <class DataType, class... Args>
|
||||||
class RandomAccessIterator< ::Kokkos::View<DataType, Args...> > {
|
class RandomAccessIterator<::Kokkos::View<DataType, Args...>> {
|
||||||
public:
|
public:
|
||||||
using view_type = ::Kokkos::View<DataType, Args...>;
|
using view_type = ::Kokkos::View<DataType, Args...>;
|
||||||
using iterator_type = RandomAccessIterator<view_type>;
|
using iterator_type = RandomAccessIterator<view_type>;
|
||||||
@ -41,30 +63,31 @@ class RandomAccessIterator< ::Kokkos::View<DataType, Args...> > {
|
|||||||
using pointer = typename view_type::pointer_type;
|
using pointer = typename view_type::pointer_type;
|
||||||
using reference = typename view_type::reference_type;
|
using reference = typename view_type::reference_type;
|
||||||
|
|
||||||
|
// oneDPL needs this alias in order not to assume the data is on the host but on
|
||||||
|
// the device, see
|
||||||
|
// https://github.com/uxlfoundation/oneDPL/blob/a045eac689f9107f50ba7b42235e9e927118e483/include/oneapi/dpl/pstl/hetero/dpcpp/utils_ranges_sycl.h#L210-L214
|
||||||
|
#ifdef KOKKOS_ENABLE_ONEDPL
|
||||||
|
using is_passed_directly = std::true_type;
|
||||||
|
#endif
|
||||||
|
|
||||||
static_assert(view_type::rank == 1 &&
|
static_assert(view_type::rank == 1 &&
|
||||||
(std::is_same_v<typename view_type::traits::array_layout,
|
is_always_strided<::Kokkos::View<DataType, Args...>>::value);
|
||||||
Kokkos::LayoutLeft> ||
|
|
||||||
std::is_same_v<typename view_type::traits::array_layout,
|
|
||||||
Kokkos::LayoutRight> ||
|
|
||||||
std::is_same_v<typename view_type::traits::array_layout,
|
|
||||||
Kokkos::LayoutStride>),
|
|
||||||
"RandomAccessIterator only supports 1D Views with LayoutLeft, "
|
|
||||||
"LayoutRight, LayoutStride.");
|
|
||||||
|
|
||||||
KOKKOS_DEFAULTED_FUNCTION RandomAccessIterator() = default;
|
KOKKOS_DEFAULTED_FUNCTION RandomAccessIterator() = default;
|
||||||
|
|
||||||
explicit KOKKOS_FUNCTION RandomAccessIterator(const view_type view)
|
explicit KOKKOS_FUNCTION RandomAccessIterator(const view_type view)
|
||||||
: m_view(view) {}
|
: m_data(view.data()), m_stride(view.stride_0()) {}
|
||||||
explicit KOKKOS_FUNCTION RandomAccessIterator(const view_type view,
|
explicit KOKKOS_FUNCTION RandomAccessIterator(const view_type view,
|
||||||
ptrdiff_t current_index)
|
ptrdiff_t current_index)
|
||||||
: m_view(view), m_current_index(current_index) {}
|
: m_data(view.data() + current_index * view.stride_0()),
|
||||||
|
m_stride(view.stride_0()) {}
|
||||||
|
|
||||||
#ifndef KOKKOS_ENABLE_CXX17 // C++20 and beyond
|
#ifndef KOKKOS_ENABLE_CXX17 // C++20 and beyond
|
||||||
template <class OtherViewType>
|
template <class OtherViewType>
|
||||||
requires(std::is_constructible_v<view_type, OtherViewType>)
|
requires(std::is_constructible_v<view_type, OtherViewType>)
|
||||||
KOKKOS_FUNCTION explicit(!std::is_convertible_v<OtherViewType, view_type>)
|
KOKKOS_FUNCTION explicit(!std::is_convertible_v<OtherViewType, view_type>)
|
||||||
RandomAccessIterator(const RandomAccessIterator<OtherViewType>& other)
|
RandomAccessIterator(const RandomAccessIterator<OtherViewType>& other)
|
||||||
: m_view(other.m_view), m_current_index(other.m_current_index) {}
|
: m_data(other.m_data), m_stride(other.m_stride) {}
|
||||||
#else
|
#else
|
||||||
template <
|
template <
|
||||||
class OtherViewType,
|
class OtherViewType,
|
||||||
@ -73,19 +96,22 @@ class RandomAccessIterator< ::Kokkos::View<DataType, Args...> > {
|
|||||||
int> = 0>
|
int> = 0>
|
||||||
KOKKOS_FUNCTION explicit RandomAccessIterator(
|
KOKKOS_FUNCTION explicit RandomAccessIterator(
|
||||||
const RandomAccessIterator<OtherViewType>& other)
|
const RandomAccessIterator<OtherViewType>& other)
|
||||||
: m_view(other.m_view), m_current_index(other.m_current_index) {}
|
: m_data(other.m_data), m_stride(other.m_stride) {}
|
||||||
|
|
||||||
template <class OtherViewType,
|
template <class OtherViewType,
|
||||||
std::enable_if_t<std::is_convertible_v<OtherViewType, view_type>,
|
std::enable_if_t<std::is_convertible_v<OtherViewType, view_type>,
|
||||||
int> = 0>
|
int> = 0>
|
||||||
KOKKOS_FUNCTION RandomAccessIterator(
|
KOKKOS_FUNCTION RandomAccessIterator(
|
||||||
const RandomAccessIterator<OtherViewType>& other)
|
const RandomAccessIterator<OtherViewType>& other)
|
||||||
: m_view(other.m_view), m_current_index(other.m_current_index) {}
|
: m_data(other.m_data), m_stride(other.m_stride) {}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
KOKKOS_FUNCTION
|
KOKKOS_FUNCTION
|
||||||
iterator_type& operator++() {
|
iterator_type& operator++() {
|
||||||
++m_current_index;
|
if constexpr (is_always_contiguous)
|
||||||
|
m_data++;
|
||||||
|
else
|
||||||
|
m_data += m_stride;
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -98,7 +124,10 @@ class RandomAccessIterator< ::Kokkos::View<DataType, Args...> > {
|
|||||||
|
|
||||||
KOKKOS_FUNCTION
|
KOKKOS_FUNCTION
|
||||||
iterator_type& operator--() {
|
iterator_type& operator--() {
|
||||||
--m_current_index;
|
if constexpr (is_always_contiguous)
|
||||||
|
m_data--;
|
||||||
|
else
|
||||||
|
m_data -= m_stride;
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -111,77 +140,95 @@ class RandomAccessIterator< ::Kokkos::View<DataType, Args...> > {
|
|||||||
|
|
||||||
KOKKOS_FUNCTION
|
KOKKOS_FUNCTION
|
||||||
reference operator[](difference_type n) const {
|
reference operator[](difference_type n) const {
|
||||||
return m_view(m_current_index + n);
|
if constexpr (is_always_contiguous)
|
||||||
|
return *(m_data + n);
|
||||||
|
else
|
||||||
|
return *(m_data + n * m_stride);
|
||||||
}
|
}
|
||||||
|
|
||||||
KOKKOS_FUNCTION
|
KOKKOS_FUNCTION
|
||||||
iterator_type& operator+=(difference_type n) {
|
iterator_type& operator+=(difference_type n) {
|
||||||
m_current_index += n;
|
if constexpr (is_always_contiguous)
|
||||||
|
m_data += n;
|
||||||
|
else
|
||||||
|
m_data += n * m_stride;
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
KOKKOS_FUNCTION
|
KOKKOS_FUNCTION
|
||||||
iterator_type& operator-=(difference_type n) {
|
iterator_type& operator-=(difference_type n) {
|
||||||
m_current_index -= n;
|
if constexpr (is_always_contiguous)
|
||||||
|
m_data -= n;
|
||||||
|
else
|
||||||
|
m_data -= n * m_stride;
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
KOKKOS_FUNCTION
|
KOKKOS_FUNCTION
|
||||||
iterator_type operator+(difference_type n) const {
|
iterator_type operator+(difference_type n) const {
|
||||||
return iterator_type(m_view, m_current_index + n);
|
auto it = *this;
|
||||||
|
it += n;
|
||||||
|
return it;
|
||||||
|
}
|
||||||
|
|
||||||
|
friend iterator_type operator+(difference_type n, iterator_type other) {
|
||||||
|
return other + n;
|
||||||
}
|
}
|
||||||
|
|
||||||
KOKKOS_FUNCTION
|
KOKKOS_FUNCTION
|
||||||
iterator_type operator-(difference_type n) const {
|
iterator_type operator-(difference_type n) const {
|
||||||
return iterator_type(m_view, m_current_index - n);
|
auto it = *this;
|
||||||
|
it -= n;
|
||||||
|
return it;
|
||||||
}
|
}
|
||||||
|
|
||||||
KOKKOS_FUNCTION
|
KOKKOS_FUNCTION
|
||||||
difference_type operator-(iterator_type it) const {
|
difference_type operator-(iterator_type it) const {
|
||||||
return m_current_index - it.m_current_index;
|
if constexpr (is_always_contiguous)
|
||||||
|
return m_data - it.m_data;
|
||||||
|
else
|
||||||
|
return (m_data - it.m_data) / m_stride;
|
||||||
}
|
}
|
||||||
|
|
||||||
KOKKOS_FUNCTION
|
KOKKOS_FUNCTION
|
||||||
bool operator==(iterator_type other) const {
|
bool operator==(iterator_type other) const {
|
||||||
return m_current_index == other.m_current_index &&
|
return m_data == other.m_data && m_stride == other.m_stride;
|
||||||
m_view.data() == other.m_view.data();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
KOKKOS_FUNCTION
|
KOKKOS_FUNCTION
|
||||||
bool operator!=(iterator_type other) const {
|
bool operator!=(iterator_type other) const {
|
||||||
return m_current_index != other.m_current_index ||
|
return m_data != other.m_data || m_stride != other.m_stride;
|
||||||
m_view.data() != other.m_view.data();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
KOKKOS_FUNCTION
|
KOKKOS_FUNCTION
|
||||||
bool operator<(iterator_type other) const {
|
bool operator<(iterator_type other) const { return m_data < other.m_data; }
|
||||||
return m_current_index < other.m_current_index;
|
|
||||||
}
|
|
||||||
|
|
||||||
KOKKOS_FUNCTION
|
KOKKOS_FUNCTION
|
||||||
bool operator<=(iterator_type other) const {
|
bool operator<=(iterator_type other) const { return m_data <= other.m_data; }
|
||||||
return m_current_index <= other.m_current_index;
|
|
||||||
}
|
|
||||||
|
|
||||||
KOKKOS_FUNCTION
|
KOKKOS_FUNCTION
|
||||||
bool operator>(iterator_type other) const {
|
bool operator>(iterator_type other) const { return m_data > other.m_data; }
|
||||||
return m_current_index > other.m_current_index;
|
|
||||||
}
|
|
||||||
|
|
||||||
KOKKOS_FUNCTION
|
KOKKOS_FUNCTION
|
||||||
bool operator>=(iterator_type other) const {
|
bool operator>=(iterator_type other) const { return m_data >= other.m_data; }
|
||||||
return m_current_index >= other.m_current_index;
|
|
||||||
}
|
|
||||||
|
|
||||||
KOKKOS_FUNCTION
|
KOKKOS_FUNCTION
|
||||||
reference operator*() const { return m_view(m_current_index); }
|
reference operator*() const { return *m_data; }
|
||||||
|
|
||||||
KOKKOS_FUNCTION
|
KOKKOS_FUNCTION
|
||||||
view_type view() const { return m_view; }
|
pointer data() const { return m_data; }
|
||||||
|
|
||||||
|
KOKKOS_FUNCTION
|
||||||
|
int stride() const { return m_stride; }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
view_type m_view;
|
pointer m_data;
|
||||||
ptrdiff_t m_current_index = 0;
|
int m_stride;
|
||||||
|
static constexpr bool is_always_contiguous =
|
||||||
|
(std::is_same_v<typename view_type::traits::array_layout,
|
||||||
|
Kokkos::LayoutLeft> ||
|
||||||
|
std::is_same_v<typename view_type::traits::array_layout,
|
||||||
|
Kokkos::LayoutRight>);
|
||||||
|
|
||||||
// Needed for the converting constructor accepting another iterator
|
// Needed for the converting constructor accepting another iterator
|
||||||
template <class>
|
template <class>
|
||||||
@ -192,4 +239,10 @@ class RandomAccessIterator< ::Kokkos::View<DataType, Args...> > {
|
|||||||
} // namespace Experimental
|
} // namespace Experimental
|
||||||
} // namespace Kokkos
|
} // namespace Kokkos
|
||||||
|
|
||||||
|
#ifdef KOKKOS_ENABLE_SYCL
|
||||||
|
template <class T>
|
||||||
|
struct sycl::is_device_copyable<
|
||||||
|
Kokkos::Experimental::Impl::RandomAccessIterator<T>> : std::true_type {};
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@ -52,13 +52,10 @@ struct StdUniqueFunctor {
|
|||||||
auto& val_i = m_first_from[i];
|
auto& val_i = m_first_from[i];
|
||||||
const auto& val_ip1 = m_first_from[i + 1];
|
const auto& val_ip1 = m_first_from[i + 1];
|
||||||
|
|
||||||
if (final_pass) {
|
|
||||||
if (!m_pred(val_i, val_ip1)) {
|
if (!m_pred(val_i, val_ip1)) {
|
||||||
|
if (final_pass) {
|
||||||
m_first_dest[update] = std::move(val_i);
|
m_first_dest[update] = std::move(val_i);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
if (!m_pred(val_i, val_ip1)) {
|
|
||||||
update += 1;
|
update += 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -188,6 +185,7 @@ KOKKOS_FUNCTION IteratorType unique_team_impl(const TeamHandleType& teamHandle,
|
|||||||
IteratorType result = first;
|
IteratorType result = first;
|
||||||
IteratorType lfirst = first;
|
IteratorType lfirst = first;
|
||||||
while (++lfirst != last) {
|
while (++lfirst != last) {
|
||||||
|
// NOLINTNEXTLINE(bugprone-inc-dec-in-conditions)
|
||||||
if (!pred(*result, *lfirst) && ++result != lfirst) {
|
if (!pred(*result, *lfirst) && ++result != lfirst) {
|
||||||
*result = std::move(*lfirst);
|
*result = std::move(*lfirst);
|
||||||
}
|
}
|
||||||
|
|||||||
@ -175,9 +175,8 @@ KOKKOS_FUNCTION OutputIterator unique_copy_team_impl(
|
|||||||
d_first + count);
|
d_first + count);
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined KOKKOS_COMPILER_INTEL || \
|
#if defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \
|
||||||
(defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \
|
!defined(KOKKOS_COMPILER_MSVC)
|
||||||
!defined(KOKKOS_COMPILER_MSVC))
|
|
||||||
__builtin_unreachable();
|
__builtin_unreachable();
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|||||||
@ -18,6 +18,8 @@ LINK ?= $(CXX)
|
|||||||
LDFLAGS ?=
|
LDFLAGS ?=
|
||||||
override LDFLAGS += -lpthread
|
override LDFLAGS += -lpthread
|
||||||
|
|
||||||
|
KOKKOS_USE_DEPRECATED_MAKEFILES=1
|
||||||
|
|
||||||
include $(KOKKOS_PATH)/Makefile.kokkos
|
include $(KOKKOS_PATH)/Makefile.kokkos
|
||||||
|
|
||||||
KOKKOS_CXXFLAGS += -I$(GTEST_PATH) -I${KOKKOS_PATH}/algorithms/unit_tests -I${KOKKOS_PATH}/core/unit_test/category_files
|
KOKKOS_CXXFLAGS += -I$(GTEST_PATH) -I${KOKKOS_PATH}/algorithms/unit_tests -I${KOKKOS_PATH}/core/unit_test/category_files
|
||||||
|
|||||||
@ -281,7 +281,7 @@ struct test_random_scalar {
|
|||||||
double covariance_eps =
|
double covariance_eps =
|
||||||
result.covariance / num_draws / 2 / variance_expect;
|
result.covariance / num_draws / 2 / variance_expect;
|
||||||
#if defined(KOKKOS_BHALF_T_IS_FLOAT) && !KOKKOS_BHALF_T_IS_FLOAT
|
#if defined(KOKKOS_BHALF_T_IS_FLOAT) && !KOKKOS_BHALF_T_IS_FLOAT
|
||||||
if (!std::is_same<Scalar, Kokkos::Experimental::bhalf_t>::value) {
|
if (!std::is_same_v<Scalar, Kokkos::Experimental::bhalf_t>) {
|
||||||
#endif
|
#endif
|
||||||
EXPECT_LT(std::abs(mean_eps), tolerance);
|
EXPECT_LT(std::abs(mean_eps), tolerance);
|
||||||
EXPECT_LT(std::abs(variance_eps), 1.5 * tolerance);
|
EXPECT_LT(std::abs(variance_eps), 1.5 * tolerance);
|
||||||
@ -312,7 +312,7 @@ struct test_random_scalar {
|
|||||||
(result.covariance / HIST_DIM1D - covariance_expect) / mean_expect;
|
(result.covariance / HIST_DIM1D - covariance_expect) / mean_expect;
|
||||||
|
|
||||||
#if defined(KOKKOS_HALF_T_IS_FLOAT) && !KOKKOS_HALF_T_IS_FLOAT
|
#if defined(KOKKOS_HALF_T_IS_FLOAT) && !KOKKOS_HALF_T_IS_FLOAT
|
||||||
if (std::is_same<Scalar, Kokkos::Experimental::half_t>::value) {
|
if (std::is_same_v<Scalar, Kokkos::Experimental::half_t>) {
|
||||||
mean_eps_expect = 0.0003;
|
mean_eps_expect = 0.0003;
|
||||||
variance_eps_expect = 1.0;
|
variance_eps_expect = 1.0;
|
||||||
covariance_eps_expect = 5.0e4;
|
covariance_eps_expect = 5.0e4;
|
||||||
@ -320,7 +320,7 @@ struct test_random_scalar {
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(KOKKOS_BHALF_T_IS_FLOAT) && !KOKKOS_BHALF_T_IS_FLOAT
|
#if defined(KOKKOS_BHALF_T_IS_FLOAT) && !KOKKOS_BHALF_T_IS_FLOAT
|
||||||
if (!std::is_same<Scalar, Kokkos::Experimental::bhalf_t>::value) {
|
if (!std::is_same_v<Scalar, Kokkos::Experimental::bhalf_t>) {
|
||||||
#endif
|
#endif
|
||||||
EXPECT_LT(std::abs(mean_eps), mean_eps_expect);
|
EXPECT_LT(std::abs(mean_eps), mean_eps_expect);
|
||||||
EXPECT_LT(std::abs(variance_eps), variance_eps_expect);
|
EXPECT_LT(std::abs(variance_eps), variance_eps_expect);
|
||||||
@ -358,13 +358,13 @@ struct test_random_scalar {
|
|||||||
(result.covariance / HIST_DIM1D - covariance_expect) / mean_expect;
|
(result.covariance / HIST_DIM1D - covariance_expect) / mean_expect;
|
||||||
|
|
||||||
#if defined(KOKKOS_HALF_T_IS_FLOAT) && !KOKKOS_HALF_T_IS_FLOAT
|
#if defined(KOKKOS_HALF_T_IS_FLOAT) && !KOKKOS_HALF_T_IS_FLOAT
|
||||||
if (std::is_same<Scalar, Kokkos::Experimental::half_t>::value) {
|
if (std::is_same_v<Scalar, Kokkos::Experimental::half_t>) {
|
||||||
variance_factor = 7;
|
variance_factor = 7;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(KOKKOS_BHALF_T_IS_FLOAT) && !KOKKOS_BHALF_T_IS_FLOAT
|
#if defined(KOKKOS_BHALF_T_IS_FLOAT) && !KOKKOS_BHALF_T_IS_FLOAT
|
||||||
if (!std::is_same<Scalar, Kokkos::Experimental::bhalf_t>::value) {
|
if (!std::is_same_v<Scalar, Kokkos::Experimental::bhalf_t>) {
|
||||||
#endif
|
#endif
|
||||||
EXPECT_LT(std::abs(mean_eps), tolerance);
|
EXPECT_LT(std::abs(mean_eps), tolerance);
|
||||||
EXPECT_LT(std::abs(variance_eps), variance_factor);
|
EXPECT_LT(std::abs(variance_eps), variance_factor);
|
||||||
|
|||||||
@ -37,12 +37,18 @@ struct random_access_iterator_test : std_algorithms_test {
|
|||||||
|
|
||||||
TEST_F(random_access_iterator_test, constructor) {
|
TEST_F(random_access_iterator_test, constructor) {
|
||||||
// just tests that constructor works
|
// just tests that constructor works
|
||||||
auto it1 = KE::Impl::RandomAccessIterator<static_view_t>(m_static_view);
|
[[maybe_unused]] auto it1 =
|
||||||
auto it2 = KE::Impl::RandomAccessIterator<dyn_view_t>(m_dynamic_view);
|
KE::Impl::RandomAccessIterator<static_view_t>(m_static_view);
|
||||||
auto it3 = KE::Impl::RandomAccessIterator<strided_view_t>(m_strided_view);
|
[[maybe_unused]] auto it2 =
|
||||||
auto it4 = KE::Impl::RandomAccessIterator<static_view_t>(m_static_view, 3);
|
KE::Impl::RandomAccessIterator<dyn_view_t>(m_dynamic_view);
|
||||||
auto it5 = KE::Impl::RandomAccessIterator<dyn_view_t>(m_dynamic_view, 3);
|
[[maybe_unused]] auto it3 =
|
||||||
auto it6 = KE::Impl::RandomAccessIterator<strided_view_t>(m_strided_view, 3);
|
KE::Impl::RandomAccessIterator<strided_view_t>(m_strided_view);
|
||||||
|
[[maybe_unused]] auto it4 =
|
||||||
|
KE::Impl::RandomAccessIterator<static_view_t>(m_static_view, 3);
|
||||||
|
[[maybe_unused]] auto it5 =
|
||||||
|
KE::Impl::RandomAccessIterator<dyn_view_t>(m_dynamic_view, 3);
|
||||||
|
[[maybe_unused]] auto it6 =
|
||||||
|
KE::Impl::RandomAccessIterator<strided_view_t>(m_strided_view, 3);
|
||||||
EXPECT_TRUE(true);
|
EXPECT_TRUE(true);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -99,6 +99,7 @@ void test_dynamic_view_sort_impl(unsigned int n) {
|
|||||||
Kokkos::Experimental::DynamicView<KeyType*, ExecutionSpace>;
|
Kokkos::Experimental::DynamicView<KeyType*, ExecutionSpace>;
|
||||||
using KeyViewType = Kokkos::View<KeyType*, ExecutionSpace>;
|
using KeyViewType = Kokkos::View<KeyType*, ExecutionSpace>;
|
||||||
|
|
||||||
|
// NOLINTNEXTLINE(bugprone-implicit-widening-of-multiplication-result)
|
||||||
const size_t upper_bound = 2 * n;
|
const size_t upper_bound = 2 * n;
|
||||||
const size_t min_chunk_size = 1024;
|
const size_t min_chunk_size = 1024;
|
||||||
|
|
||||||
|
|||||||
@ -198,9 +198,8 @@ auto create_deep_copyable_compatible_view_with_same_extent(ViewType view) {
|
|||||||
|
|
||||||
// this is needed for intel to avoid
|
// this is needed for intel to avoid
|
||||||
// error #1011: missing return statement at end of non-void function
|
// error #1011: missing return statement at end of non-void function
|
||||||
#if defined KOKKOS_COMPILER_INTEL || \
|
#if defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \
|
||||||
(defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \
|
!defined(KOKKOS_COMPILER_MSVC)
|
||||||
!defined(KOKKOS_COMPILER_MSVC))
|
|
||||||
__builtin_unreachable();
|
__builtin_unreachable();
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|||||||
@ -507,6 +507,20 @@ struct TestStruct {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#ifndef KOKKOS_ENABLE_CXX17
|
||||||
|
template <typename ViewType>
|
||||||
|
constexpr bool
|
||||||
|
test_kokkos_iterator_satify_std_random_access_iterator_concept() {
|
||||||
|
return std::random_access_iterator<
|
||||||
|
Kokkos::Experimental::Impl::RandomAccessIterator<ViewType>>;
|
||||||
|
}
|
||||||
|
|
||||||
|
static_assert(test_kokkos_iterator_satify_std_random_access_iterator_concept<
|
||||||
|
Kokkos::View<int *>>());
|
||||||
|
static_assert(test_kokkos_iterator_satify_std_random_access_iterator_concept<
|
||||||
|
Kokkos::View<const int *>>());
|
||||||
|
#endif
|
||||||
|
|
||||||
} // namespace compileonly
|
} // namespace compileonly
|
||||||
} // namespace stdalgos
|
} // namespace stdalgos
|
||||||
} // namespace Test
|
} // namespace Test
|
||||||
|
|||||||
@ -173,6 +173,7 @@ TEST(std_algorithms_DeathTest, expect_no_overlap) {
|
|||||||
|
|
||||||
KE::Impl::expect_no_overlap(sub_first_d0, sub_last_d0, sub_first_d1);
|
KE::Impl::expect_no_overlap(sub_first_d0, sub_last_d0, sub_first_d1);
|
||||||
|
|
||||||
|
// NOLINTNEXTLINE(bugprone-implicit-widening-of-multiplication-result)
|
||||||
Kokkos::LayoutStride layout2d{2, 3, extent0, 2 * 3};
|
Kokkos::LayoutStride layout2d{2, 3, extent0, 2 * 3};
|
||||||
Kokkos::View<value_type**, Kokkos::LayoutStride> strided_view_2d{
|
Kokkos::View<value_type**, Kokkos::LayoutStride> strided_view_2d{
|
||||||
"std-algo-test-2d-contiguous-view-strided", layout2d};
|
"std-algo-test-2d-contiguous-view-strided", layout2d};
|
||||||
|
|||||||
@ -171,7 +171,7 @@ struct VerifyData {
|
|||||||
create_mirror_view_and_copy(Kokkos::HostSpace(), test_view_dc);
|
create_mirror_view_and_copy(Kokkos::HostSpace(), test_view_dc);
|
||||||
if (test_view_h.extent(0) > 0) {
|
if (test_view_h.extent(0) > 0) {
|
||||||
for (std::size_t i = 0; i < test_view_h.extent(0); ++i) {
|
for (std::size_t i = 0; i < test_view_h.extent(0); ++i) {
|
||||||
if (std::is_same<gold_view_value_type, int>::value) {
|
if (std::is_same_v<gold_view_value_type, int>) {
|
||||||
ASSERT_EQ(gold_h(i), test_view_h(i));
|
ASSERT_EQ(gold_h(i), test_view_h(i));
|
||||||
} else {
|
} else {
|
||||||
const auto error =
|
const auto error =
|
||||||
|
|||||||
@ -184,7 +184,7 @@ struct VerifyData {
|
|||||||
const auto ext = test_view_h.extent(0);
|
const auto ext = test_view_h.extent(0);
|
||||||
if (ext > 0) {
|
if (ext > 0) {
|
||||||
for (std::size_t i = 0; i < ext; ++i) {
|
for (std::size_t i = 0; i < ext; ++i) {
|
||||||
if (std::is_same<gold_view_value_type, int>::value) {
|
if (std::is_same_v<gold_view_value_type, int>) {
|
||||||
ASSERT_EQ(gold_h(i), test_view_h(i));
|
ASSERT_EQ(gold_h(i), test_view_h(i));
|
||||||
} else {
|
} else {
|
||||||
const auto error =
|
const auto error =
|
||||||
|
|||||||
@ -153,12 +153,13 @@ void run_single_scenario(const InfoType& scenario_info) {
|
|||||||
|
|
||||||
#if !defined KOKKOS_ENABLE_OPENMPTARGET
|
#if !defined KOKKOS_ENABLE_OPENMPTARGET
|
||||||
CustomLessThanComparator<ValueType, ValueType> comp;
|
CustomLessThanComparator<ValueType, ValueType> comp;
|
||||||
auto r5 =
|
[[maybe_unused]] auto r5 =
|
||||||
KE::is_sorted_until(exespace(), KE::cbegin(view), KE::cend(view), comp);
|
KE::is_sorted_until(exespace(), KE::cbegin(view), KE::cend(view), comp);
|
||||||
auto r6 = KE::is_sorted_until("label", exespace(), KE::cbegin(view),
|
[[maybe_unused]] auto r6 = KE::is_sorted_until(
|
||||||
KE::cend(view), comp);
|
"label", exespace(), KE::cbegin(view), KE::cend(view), comp);
|
||||||
auto r7 = KE::is_sorted_until(exespace(), view, comp);
|
[[maybe_unused]] auto r7 = KE::is_sorted_until(exespace(), view, comp);
|
||||||
auto r8 = KE::is_sorted_until("label", exespace(), view, comp);
|
[[maybe_unused]] auto r8 =
|
||||||
|
KE::is_sorted_until("label", exespace(), view, comp);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
ASSERT_EQ(r1, gold) << name << ", " << view_tag_to_string(Tag{});
|
ASSERT_EQ(r1, gold) << name << ", " << view_tag_to_string(Tag{});
|
||||||
|
|||||||
@ -53,13 +53,13 @@ TEST(std_algorithms_mod_ops_test, move) {
|
|||||||
// move constr
|
// move constr
|
||||||
MyMovableType b(std::move(a));
|
MyMovableType b(std::move(a));
|
||||||
ASSERT_EQ(b.m_value, 11);
|
ASSERT_EQ(b.m_value, 11);
|
||||||
ASSERT_EQ(a.m_value, -2);
|
ASSERT_EQ(a.m_value, -2); // NOLINT(bugprone-use-after-move)
|
||||||
|
|
||||||
// move assign
|
// move assign
|
||||||
MyMovableType c;
|
MyMovableType c;
|
||||||
c = std::move(b);
|
c = std::move(b);
|
||||||
ASSERT_EQ(c.m_value, 11);
|
ASSERT_EQ(c.m_value, 11);
|
||||||
ASSERT_EQ(b.m_value, -4);
|
ASSERT_EQ(b.m_value, -4); // NOLINT(bugprone-use-after-move)
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class ViewType>
|
template <class ViewType>
|
||||||
@ -70,7 +70,7 @@ struct StdAlgoModSeqOpsTestMove {
|
|||||||
void operator()(const int index) const {
|
void operator()(const int index) const {
|
||||||
typename ViewType::value_type a{11};
|
typename ViewType::value_type a{11};
|
||||||
using move_t = decltype(std::move(a));
|
using move_t = decltype(std::move(a));
|
||||||
static_assert(std::is_rvalue_reference<move_t>::value);
|
static_assert(std::is_rvalue_reference_v<move_t>);
|
||||||
m_view(index) = std::move(a);
|
m_view(index) = std::move(a);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -243,16 +243,15 @@ void run_and_check_transform_reduce_overloadA(ViewType1 first_view,
|
|||||||
ViewType2 second_view,
|
ViewType2 second_view,
|
||||||
ValueType init_value,
|
ValueType init_value,
|
||||||
ValueType result_value,
|
ValueType result_value,
|
||||||
Args&&... args) {
|
Args const&... args) {
|
||||||
// trivial cases
|
// trivial cases
|
||||||
const auto r1 = KE::transform_reduce(
|
const auto r1 = KE::transform_reduce(
|
||||||
ExecutionSpace(), KE::cbegin(first_view), KE::cbegin(first_view),
|
ExecutionSpace(), KE::cbegin(first_view), KE::cbegin(first_view),
|
||||||
KE::cbegin(second_view), init_value, std::forward<Args>(args)...);
|
KE::cbegin(second_view), init_value, args...);
|
||||||
|
|
||||||
const auto r2 =
|
const auto r2 = KE::transform_reduce(
|
||||||
KE::transform_reduce("MYLABEL", ExecutionSpace(), KE::cbegin(first_view),
|
"MYLABEL", ExecutionSpace(), KE::cbegin(first_view),
|
||||||
KE::cbegin(first_view), KE::cbegin(second_view),
|
KE::cbegin(first_view), KE::cbegin(second_view), init_value, args...);
|
||||||
init_value, std::forward<Args>(args)...);
|
|
||||||
|
|
||||||
ASSERT_EQ(r1, init_value);
|
ASSERT_EQ(r1, init_value);
|
||||||
ASSERT_EQ(r2, init_value);
|
ASSERT_EQ(r2, init_value);
|
||||||
@ -260,18 +259,16 @@ void run_and_check_transform_reduce_overloadA(ViewType1 first_view,
|
|||||||
// non trivial cases
|
// non trivial cases
|
||||||
const auto r3 = KE::transform_reduce(
|
const auto r3 = KE::transform_reduce(
|
||||||
ExecutionSpace(), KE::cbegin(first_view), KE::cend(first_view),
|
ExecutionSpace(), KE::cbegin(first_view), KE::cend(first_view),
|
||||||
KE::cbegin(second_view), init_value, std::forward<Args>(args)...);
|
KE::cbegin(second_view), init_value, args...);
|
||||||
|
|
||||||
const auto r4 = KE::transform_reduce(
|
const auto r4 = KE::transform_reduce(
|
||||||
"MYLABEL", ExecutionSpace(), KE::cbegin(first_view), KE::cend(first_view),
|
"MYLABEL", ExecutionSpace(), KE::cbegin(first_view), KE::cend(first_view),
|
||||||
KE::cbegin(second_view), init_value, std::forward<Args>(args)...);
|
KE::cbegin(second_view), init_value, args...);
|
||||||
|
|
||||||
const auto r5 =
|
const auto r5 = KE::transform_reduce(ExecutionSpace(), first_view,
|
||||||
KE::transform_reduce(ExecutionSpace(), first_view, second_view,
|
second_view, init_value, args...);
|
||||||
init_value, std::forward<Args>(args)...);
|
const auto r6 = KE::transform_reduce("MYLABEL", ExecutionSpace(), first_view,
|
||||||
const auto r6 =
|
second_view, init_value, args...);
|
||||||
KE::transform_reduce("MYLABEL", ExecutionSpace(), first_view, second_view,
|
|
||||||
init_value, std::forward<Args>(args)...);
|
|
||||||
|
|
||||||
ASSERT_EQ(r3, result_value);
|
ASSERT_EQ(r3, result_value);
|
||||||
ASSERT_EQ(r4, result_value);
|
ASSERT_EQ(r4, result_value);
|
||||||
@ -363,32 +360,30 @@ template <class ExecutionSpace, class ViewType, class ValueType, class... Args>
|
|||||||
void run_and_check_transform_reduce_overloadB(ViewType view,
|
void run_and_check_transform_reduce_overloadB(ViewType view,
|
||||||
ValueType init_value,
|
ValueType init_value,
|
||||||
ValueType result_value,
|
ValueType result_value,
|
||||||
Args&&... args) {
|
Args const&... args) {
|
||||||
// trivial
|
// trivial
|
||||||
const auto r1 =
|
const auto r1 = KE::transform_reduce(ExecutionSpace(), KE::cbegin(view),
|
||||||
KE::transform_reduce(ExecutionSpace(), KE::cbegin(view), KE::cbegin(view),
|
KE::cbegin(view), init_value, args...);
|
||||||
init_value, std::forward<Args>(args)...);
|
|
||||||
|
|
||||||
const auto r2 = KE::transform_reduce("MYLABEL", ExecutionSpace(),
|
const auto r2 =
|
||||||
KE::cbegin(view), KE::cbegin(view),
|
KE::transform_reduce("MYLABEL", ExecutionSpace(), KE::cbegin(view),
|
||||||
init_value, std::forward<Args>(args)...);
|
KE::cbegin(view), init_value, args...);
|
||||||
|
|
||||||
ASSERT_EQ(r1, init_value);
|
ASSERT_EQ(r1, init_value);
|
||||||
ASSERT_EQ(r2, init_value);
|
ASSERT_EQ(r2, init_value);
|
||||||
|
|
||||||
// non trivial
|
// non trivial
|
||||||
const auto r3 =
|
const auto r3 = KE::transform_reduce(ExecutionSpace(), KE::cbegin(view),
|
||||||
KE::transform_reduce(ExecutionSpace(), KE::cbegin(view), KE::cend(view),
|
KE::cend(view), init_value, args...);
|
||||||
init_value, std::forward<Args>(args)...);
|
|
||||||
|
|
||||||
const auto r4 = KE::transform_reduce("MYLABEL", ExecutionSpace(),
|
const auto r4 =
|
||||||
KE::cbegin(view), KE::cend(view),
|
KE::transform_reduce("MYLABEL", ExecutionSpace(), KE::cbegin(view),
|
||||||
init_value, std::forward<Args>(args)...);
|
KE::cend(view), init_value, args...);
|
||||||
const auto r5 = KE::transform_reduce(ExecutionSpace(), view, init_value,
|
const auto r5 =
|
||||||
std::forward<Args>(args)...);
|
KE::transform_reduce(ExecutionSpace(), view, init_value, args...);
|
||||||
|
|
||||||
const auto r6 = KE::transform_reduce("MYLABEL", ExecutionSpace(), view,
|
const auto r6 = KE::transform_reduce("MYLABEL", ExecutionSpace(), view,
|
||||||
init_value, std::forward<Args>(args)...);
|
init_value, args...);
|
||||||
|
|
||||||
ASSERT_EQ(r3, result_value);
|
ASSERT_EQ(r3, result_value);
|
||||||
ASSERT_EQ(r4, result_value);
|
ASSERT_EQ(r4, result_value);
|
||||||
|
|||||||
@ -196,7 +196,7 @@ void run_single_scenario(const InfoType& scenario_info,
|
|||||||
// create host copy BEFORE rotate or view will be modified
|
// create host copy BEFORE rotate or view will be modified
|
||||||
auto view_h = create_host_space_copy(view);
|
auto view_h = create_host_space_copy(view);
|
||||||
auto rit = KE::rotate(exespace(), view, rotation_point);
|
auto rit = KE::rotate(exespace(), view, rotation_point);
|
||||||
// verify_data(rit, view, view_h, rotation_point);
|
verify_data(rit, view, view_h, rotation_point);
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
|
|||||||
@ -191,6 +191,7 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) {
|
|||||||
ASSERT_EQ(stdDistance, distancesView_h(i));
|
ASSERT_EQ(stdDistance, distancesView_h(i));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
default: Kokkos::abort("unreachable");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -217,6 +217,7 @@ void test_A(const bool ensureAdjacentFindCanFind, std::size_t numTeams,
|
|||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
default: Kokkos::abort("unreachable");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -244,6 +244,7 @@ void test_A(const bool viewsAreEqual, std::size_t numTeams, std::size_t numCols,
|
|||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
default: Kokkos::abort("unreachable");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -224,6 +224,7 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) {
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
default: Kokkos::abort("unreachable");
|
||||||
}
|
}
|
||||||
|
|
||||||
#undef exclusive_scan
|
#undef exclusive_scan
|
||||||
|
|||||||
@ -227,6 +227,7 @@ void test_A(const bool sequencesExist, std::size_t numTeams,
|
|||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
default: Kokkos::abort("unreachable");
|
||||||
}
|
}
|
||||||
|
|
||||||
if (sequencesExist) {
|
if (sequencesExist) {
|
||||||
|
|||||||
@ -244,6 +244,7 @@ void test_A(const bool sequencesExist, std::size_t numTeams,
|
|||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
default: Kokkos::abort("unreachable");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -57,14 +57,7 @@ struct TestFunctorA {
|
|||||||
const auto myRowIndex = member.league_rank();
|
const auto myRowIndex = member.league_rank();
|
||||||
auto myRowViewFrom = Kokkos::subview(m_dataView, myRowIndex, Kokkos::ALL());
|
auto myRowViewFrom = Kokkos::subview(m_dataView, myRowIndex, Kokkos::ALL());
|
||||||
const auto val = m_greaterThanValuesView(myRowIndex);
|
const auto val = m_greaterThanValuesView(myRowIndex);
|
||||||
// FIXME_INTEL
|
|
||||||
#if defined(KOKKOS_COMPILER_INTEL) && (1900 == KOKKOS_COMPILER_INTEL)
|
|
||||||
GreaterEqualFunctor<
|
|
||||||
typename GreaterThanValuesViewType::non_const_value_type>
|
|
||||||
unaryPred{val};
|
|
||||||
#else
|
|
||||||
GreaterEqualFunctor unaryPred{val};
|
GreaterEqualFunctor unaryPred{val};
|
||||||
#endif
|
|
||||||
ptrdiff_t resultDist = 0;
|
ptrdiff_t resultDist = 0;
|
||||||
|
|
||||||
switch (m_apiPick) {
|
switch (m_apiPick) {
|
||||||
@ -185,12 +178,7 @@ void test_A(const bool predicatesReturnTrue, std::size_t numTeams,
|
|||||||
const auto rowFromBegin = KE::cbegin(rowFrom);
|
const auto rowFromBegin = KE::cbegin(rowFrom);
|
||||||
const auto rowFromEnd = KE::cend(rowFrom);
|
const auto rowFromEnd = KE::cend(rowFrom);
|
||||||
const auto val = greaterEqualValuesView_h(i);
|
const auto val = greaterEqualValuesView_h(i);
|
||||||
// FIXME_INTEL
|
|
||||||
#if defined(KOKKOS_COMPILER_INTEL) && (1900 == KOKKOS_COMPILER_INTEL)
|
|
||||||
const GreaterEqualFunctor<ValueType> unaryPred{val};
|
|
||||||
#else
|
|
||||||
const GreaterEqualFunctor unaryPred{val};
|
const GreaterEqualFunctor unaryPred{val};
|
||||||
#endif
|
|
||||||
|
|
||||||
auto it = std::find_if(rowFromBegin, rowFromEnd, unaryPred);
|
auto it = std::find_if(rowFromBegin, rowFromEnd, unaryPred);
|
||||||
|
|
||||||
|
|||||||
@ -57,14 +57,7 @@ struct TestFunctorA {
|
|||||||
const auto myRowIndex = member.league_rank();
|
const auto myRowIndex = member.league_rank();
|
||||||
auto myRowViewFrom = Kokkos::subview(m_dataView, myRowIndex, Kokkos::ALL());
|
auto myRowViewFrom = Kokkos::subview(m_dataView, myRowIndex, Kokkos::ALL());
|
||||||
const auto val = m_greaterThanValuesView(myRowIndex);
|
const auto val = m_greaterThanValuesView(myRowIndex);
|
||||||
// FIXME_INTEL
|
|
||||||
#if defined(KOKKOS_COMPILER_INTEL) && (1900 == KOKKOS_COMPILER_INTEL)
|
|
||||||
GreaterEqualFunctor<
|
|
||||||
typename GreaterThanValuesViewType::non_const_value_type>
|
|
||||||
unaryPred{val};
|
|
||||||
#else
|
|
||||||
GreaterEqualFunctor unaryPred{val};
|
GreaterEqualFunctor unaryPred{val};
|
||||||
#endif
|
|
||||||
ptrdiff_t resultDist = 0;
|
ptrdiff_t resultDist = 0;
|
||||||
|
|
||||||
switch (m_apiPick) {
|
switch (m_apiPick) {
|
||||||
@ -180,12 +173,7 @@ void test_A(const bool predicatesReturnTrue, std::size_t numTeams,
|
|||||||
const auto rowFromBegin = KE::cbegin(rowFrom);
|
const auto rowFromBegin = KE::cbegin(rowFrom);
|
||||||
const auto rowFromEnd = KE::cend(rowFrom);
|
const auto rowFromEnd = KE::cend(rowFrom);
|
||||||
const auto val = greaterEqualValuesView_h(i);
|
const auto val = greaterEqualValuesView_h(i);
|
||||||
// FIXME_INTEL
|
|
||||||
#if defined(KOKKOS_COMPILER_INTEL) && (1900 == KOKKOS_COMPILER_INTEL)
|
|
||||||
const GreaterEqualFunctor<ValueType> unaryPred{val};
|
|
||||||
#else
|
|
||||||
const GreaterEqualFunctor unaryPred{val};
|
const GreaterEqualFunctor unaryPred{val};
|
||||||
#endif
|
|
||||||
|
|
||||||
auto it = std::find_if_not(rowFromBegin, rowFromEnd, unaryPred);
|
auto it = std::find_if_not(rowFromBegin, rowFromEnd, unaryPred);
|
||||||
|
|
||||||
|
|||||||
@ -253,6 +253,7 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) {
|
|||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
default: Kokkos::abort("unreachable");
|
||||||
}
|
}
|
||||||
|
|
||||||
#undef inclusive_scan
|
#undef inclusive_scan
|
||||||
|
|||||||
@ -245,6 +245,7 @@ void test_A(const TestCaseType testCase, std::size_t numTeams,
|
|||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
default: Kokkos::abort("unreachable");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -249,6 +249,7 @@ void test_A(const bool viewsAreEqual, std::size_t numTeams, std::size_t numCols,
|
|||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
default: Kokkos::abort("unreachable");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -242,6 +242,7 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) {
|
|||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
default: Kokkos::abort("unreachable");
|
||||||
}
|
}
|
||||||
|
|
||||||
#undef reduce
|
#undef reduce
|
||||||
|
|||||||
@ -243,6 +243,7 @@ void test_A(const bool sequencesExist, std::size_t numTeams,
|
|||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
default: Kokkos::abort("unreachable");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -258,6 +258,7 @@ void test_A(const bool sequencesExist, std::size_t numTeams,
|
|||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
default: Kokkos::abort("unreachable");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -203,6 +203,7 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) {
|
|||||||
ASSERT_EQ(stdDistance, distancesView_h(i));
|
ASSERT_EQ(stdDistance, distancesView_h(i));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
default: Kokkos::abort("unreachable");
|
||||||
}
|
}
|
||||||
|
|
||||||
#undef transform_exclusive_scan
|
#undef transform_exclusive_scan
|
||||||
|
|||||||
@ -240,6 +240,7 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) {
|
|||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
default: Kokkos::abort("unreachable");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#undef transform_inclusive_scan
|
#undef transform_inclusive_scan
|
||||||
|
|||||||
@ -293,6 +293,7 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) {
|
|||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
default: Kokkos::abort("unreachable");
|
||||||
}
|
}
|
||||||
|
|
||||||
#undef transform_reduce
|
#undef transform_reduce
|
||||||
|
|||||||
@ -344,8 +344,7 @@ TEST(std_algorithms_numeric_ops_test, transform_exclusive_scan_functor) {
|
|||||||
using view_type = Kokkos::View<int*, exespace>;
|
using view_type = Kokkos::View<int*, exespace>;
|
||||||
view_type dummy_view("dummy_view", 0);
|
view_type dummy_view("dummy_view", 0);
|
||||||
using unary_op_type =
|
using unary_op_type =
|
||||||
Kokkos::Experimental::Impl::StdNumericScanIdentityReferenceUnaryFunctor<
|
Kokkos::Experimental::Impl::StdNumericScanIdentityReferenceUnaryFunctor;
|
||||||
int>;
|
|
||||||
using functor_type =
|
using functor_type =
|
||||||
Kokkos::Experimental::Impl::TransformExclusiveScanFunctorWithValueWrapper<
|
Kokkos::Experimental::Impl::TransformExclusiveScanFunctorWithValueWrapper<
|
||||||
exespace, int, int, view_type, view_type, MultiplyFunctor<int>,
|
exespace, int, int, view_type, view_type, MultiplyFunctor<int>,
|
||||||
|
|||||||
@ -390,8 +390,7 @@ TEST(std_algorithms_numeric_ops_test, transform_inclusive_scan_functor) {
|
|||||||
int dummy = 0;
|
int dummy = 0;
|
||||||
using view_type = Kokkos::View<int*, exespace>;
|
using view_type = Kokkos::View<int*, exespace>;
|
||||||
view_type dummy_view("dummy_view", 0);
|
view_type dummy_view("dummy_view", 0);
|
||||||
using unary_op_type =
|
using unary_op_type = KE::Impl::StdNumericScanIdentityReferenceUnaryFunctor;
|
||||||
KE::Impl::StdNumericScanIdentityReferenceUnaryFunctor<int>;
|
|
||||||
{
|
{
|
||||||
using functor_type =
|
using functor_type =
|
||||||
KE::Impl::ExeSpaceTransformInclusiveScanNoInitValueFunctor<
|
KE::Impl::ExeSpaceTransformInclusiveScanNoInitValueFunctor<
|
||||||
|
|||||||
@ -2,6 +2,7 @@ KOKKOS_DEVICES=Cuda
|
|||||||
KOKKOS_CUDA_OPTIONS=enable_lambda
|
KOKKOS_CUDA_OPTIONS=enable_lambda
|
||||||
KOKKOS_ARCH = "SNB,Volta70"
|
KOKKOS_ARCH = "SNB,Volta70"
|
||||||
|
|
||||||
|
KOKKOS_USE_DEPRECATED_MAKEFILES=1
|
||||||
|
|
||||||
MAKEFILE_PATH := $(subst Makefile,,$(abspath $(lastword $(MAKEFILE_LIST))))
|
MAKEFILE_PATH := $(subst Makefile,,$(abspath $(lastword $(MAKEFILE_LIST))))
|
||||||
|
|
||||||
|
|||||||
@ -2,6 +2,7 @@ KOKKOS_DEVICES=Cuda
|
|||||||
KOKKOS_CUDA_OPTIONS=enable_lambda
|
KOKKOS_CUDA_OPTIONS=enable_lambda
|
||||||
KOKKOS_ARCH = "SNB,Volta70"
|
KOKKOS_ARCH = "SNB,Volta70"
|
||||||
|
|
||||||
|
KOKKOS_USE_DEPRECATED_MAKEFILES=1
|
||||||
|
|
||||||
MAKEFILE_PATH := $(subst Makefile,,$(abspath $(lastword $(MAKEFILE_LIST))))
|
MAKEFILE_PATH := $(subst Makefile,,$(abspath $(lastword $(MAKEFILE_LIST))))
|
||||||
|
|
||||||
|
|||||||
@ -2,6 +2,7 @@ KOKKOS_DEVICES=Cuda
|
|||||||
KOKKOS_CUDA_OPTIONS=enable_lambda
|
KOKKOS_CUDA_OPTIONS=enable_lambda
|
||||||
KOKKOS_ARCH = "SNB,Volta70"
|
KOKKOS_ARCH = "SNB,Volta70"
|
||||||
|
|
||||||
|
KOKKOS_USE_DEPRECATED_MAKEFILES=1
|
||||||
|
|
||||||
MAKEFILE_PATH := $(subst Makefile,,$(abspath $(lastword $(MAKEFILE_LIST))))
|
MAKEFILE_PATH := $(subst Makefile,,$(abspath $(lastword $(MAKEFILE_LIST))))
|
||||||
|
|
||||||
|
|||||||
@ -37,7 +37,7 @@
|
|||||||
|
|
||||||
template <int V>
|
template <int V>
|
||||||
struct TestFunctor {
|
struct TestFunctor {
|
||||||
double values[V];
|
double values[V] = {};
|
||||||
Kokkos::View<double*> a;
|
Kokkos::View<double*> a;
|
||||||
int K;
|
int K;
|
||||||
TestFunctor(Kokkos::View<double*> a_, int K_) : a(a_), K(K_) {}
|
TestFunctor(Kokkos::View<double*> a_, int K_) : a(a_), K(K_) {}
|
||||||
@ -50,7 +50,7 @@ struct TestFunctor {
|
|||||||
|
|
||||||
template <int V>
|
template <int V>
|
||||||
struct TestRFunctor {
|
struct TestRFunctor {
|
||||||
double values[V];
|
double values[V] = {};
|
||||||
Kokkos::View<double*> a;
|
Kokkos::View<double*> a;
|
||||||
int K;
|
int K;
|
||||||
TestRFunctor(Kokkos::View<double*> a_, int K_) : a(a_), K(K_) {}
|
TestRFunctor(Kokkos::View<double*> a_, int K_) : a(a_), K(K_) {}
|
||||||
@ -247,12 +247,15 @@ int main(int argc, char* argv[]) {
|
|||||||
// anything that doesn't start with --
|
// anything that doesn't start with --
|
||||||
if (arg.size() < 2 ||
|
if (arg.size() < 2 ||
|
||||||
(arg.size() >= 2 && arg[0] != '-' && arg[1] != '-')) {
|
(arg.size() >= 2 && arg[0] != '-' && arg[1] != '-')) {
|
||||||
|
// signing off that arg.data() is null terminated
|
||||||
|
// NOLINTBEGIN(bugprone-suspicious-stringview-data-usage)
|
||||||
if (i == 1)
|
if (i == 1)
|
||||||
N = atoi(arg.data());
|
N = atoi(arg.data());
|
||||||
else if (i == 2)
|
else if (i == 2)
|
||||||
M = atoi(arg.data());
|
M = atoi(arg.data());
|
||||||
else if (i == 3)
|
else if (i == 3)
|
||||||
K = atoi(arg.data());
|
K = atoi(arg.data());
|
||||||
|
// NOLINTEND(bugprone-suspicious-stringview-data-usage)
|
||||||
else {
|
else {
|
||||||
Kokkos::abort("unexpected argument!");
|
Kokkos::abort("unexpected argument!");
|
||||||
}
|
}
|
||||||
|
|||||||
@ -2,6 +2,7 @@ KOKKOS_DEVICES=Cuda
|
|||||||
KOKKOS_CUDA_OPTIONS=enable_lambda
|
KOKKOS_CUDA_OPTIONS=enable_lambda
|
||||||
KOKKOS_ARCH = "SNB,Volta70"
|
KOKKOS_ARCH = "SNB,Volta70"
|
||||||
|
|
||||||
|
KOKKOS_USE_DEPRECATED_MAKEFILES=1
|
||||||
|
|
||||||
MAKEFILE_PATH := $(subst Makefile,,$(abspath $(lastword $(MAKEFILE_LIST))))
|
MAKEFILE_PATH := $(subst Makefile,,$(abspath $(lastword $(MAKEFILE_LIST))))
|
||||||
|
|
||||||
|
|||||||
@ -120,11 +120,12 @@ int main(int argc, char* argv[]) {
|
|||||||
// view appropriately for test and should obey first-touch etc Second call to
|
// view appropriately for test and should obey first-touch etc Second call to
|
||||||
// test is the one we actually care about and time
|
// test is the one we actually care about and time
|
||||||
view_type_1d v_1(Kokkos::view_alloc(Kokkos::WithoutInitializing, "v_1"),
|
view_type_1d v_1(Kokkos::view_alloc(Kokkos::WithoutInitializing, "v_1"),
|
||||||
team_range * team_size);
|
static_cast<size_t>(team_range) * team_size);
|
||||||
view_type_2d v_2(Kokkos::view_alloc(Kokkos::WithoutInitializing, "v_2"),
|
view_type_2d v_2(Kokkos::view_alloc(Kokkos::WithoutInitializing, "v_2"),
|
||||||
team_range * team_size, thread_range);
|
static_cast<size_t>(team_range) * team_size, thread_range);
|
||||||
view_type_3d v_3(Kokkos::view_alloc(Kokkos::WithoutInitializing, "v_3"),
|
view_type_3d v_3(Kokkos::view_alloc(Kokkos::WithoutInitializing, "v_3"),
|
||||||
team_range * team_size, thread_range, vector_range);
|
static_cast<size_t>(team_range) * team_size, thread_range,
|
||||||
|
vector_range);
|
||||||
|
|
||||||
double result_computed = 0.0;
|
double result_computed = 0.0;
|
||||||
double result_expect = 0.0;
|
double result_expect = 0.0;
|
||||||
|
|||||||
@ -367,7 +367,7 @@ void test_policy(int team_range, int thread_range, int vector_range,
|
|||||||
// parallel_for RangePolicy: range = team_size*team_range
|
// parallel_for RangePolicy: range = team_size*team_range
|
||||||
if (test_type == 300) {
|
if (test_type == 300) {
|
||||||
Kokkos::parallel_for(
|
Kokkos::parallel_for(
|
||||||
"300 outer for", team_size * team_range,
|
"300 outer for", static_cast<size_t>(team_size) * team_range,
|
||||||
KOKKOS_LAMBDA(const int idx) {
|
KOKKOS_LAMBDA(const int idx) {
|
||||||
v1(idx) = idx;
|
v1(idx) = idx;
|
||||||
// prevent compiler from optimizing away the loop
|
// prevent compiler from optimizing away the loop
|
||||||
@ -376,14 +376,15 @@ void test_policy(int team_range, int thread_range, int vector_range,
|
|||||||
// parallel_reduce RangePolicy: range = team_size*team_range
|
// parallel_reduce RangePolicy: range = team_size*team_range
|
||||||
if (test_type == 400) {
|
if (test_type == 400) {
|
||||||
Kokkos::parallel_reduce(
|
Kokkos::parallel_reduce(
|
||||||
"400 outer reduce", team_size * team_range,
|
"400 outer reduce", static_cast<size_t>(team_size) * team_range,
|
||||||
KOKKOS_LAMBDA(const int idx, double& val) { val += idx; }, result);
|
KOKKOS_LAMBDA(const int idx, double& val) { val += idx; }, result);
|
||||||
result_expect =
|
result_expect =
|
||||||
0.5 * (team_size * team_range) * (team_size * team_range - 1);
|
0.5 * (team_size * team_range) * (team_size * team_range - 1);
|
||||||
}
|
}
|
||||||
// parallel_scan RangePolicy: range = team_size*team_range
|
// parallel_scan RangePolicy: range = team_size*team_range
|
||||||
if (test_type == 500) {
|
if (test_type == 500) {
|
||||||
Kokkos::parallel_scan("500 outer scan", team_size * team_range,
|
Kokkos::parallel_scan("500 outer scan",
|
||||||
|
static_cast<size_t>(team_size) * team_range,
|
||||||
ParallelScanFunctor<ViewType1>(v1)
|
ParallelScanFunctor<ViewType1>(v1)
|
||||||
#if 0
|
#if 0
|
||||||
// This does not compile with pre Cuda 8.0 - see Github Issue #913 for explanation
|
// This does not compile with pre Cuda 8.0 - see Github Issue #913 for explanation
|
||||||
|
|||||||
@ -2,6 +2,7 @@ KOKKOS_DEVICES=Cuda
|
|||||||
KOKKOS_CUDA_OPTIONS=enable_lambda
|
KOKKOS_CUDA_OPTIONS=enable_lambda
|
||||||
KOKKOS_ARCH = "SNB,Volta70"
|
KOKKOS_ARCH = "SNB,Volta70"
|
||||||
|
|
||||||
|
KOKKOS_USE_DEPRECATED_MAKEFILES=1
|
||||||
|
|
||||||
MAKEFILE_PATH := $(subst Makefile,,$(abspath $(lastword $(MAKEFILE_LIST))))
|
MAKEFILE_PATH := $(subst Makefile,,$(abspath $(lastword $(MAKEFILE_LIST))))
|
||||||
|
|
||||||
|
|||||||
@ -1,6 +1,7 @@
|
|||||||
KOKKOS_DEVICES=Serial
|
KOKKOS_DEVICES=Serial
|
||||||
KOKKOS_ARCH = ""
|
KOKKOS_ARCH = ""
|
||||||
|
|
||||||
|
KOKKOS_USE_DEPRECATED_MAKEFILES=1
|
||||||
|
|
||||||
MAKEFILE_PATH := $(subst Makefile,,$(abspath $(lastword $(MAKEFILE_LIST))))
|
MAKEFILE_PATH := $(subst Makefile,,$(abspath $(lastword $(MAKEFILE_LIST))))
|
||||||
|
|
||||||
|
|||||||
@ -317,7 +317,7 @@ do
|
|||||||
# End of Werror handling
|
# End of Werror handling
|
||||||
#Handle unsupported standard flags
|
#Handle unsupported standard flags
|
||||||
--std=c++1y|-std=c++1y|--std=gnu++1y|-std=gnu++1y|--std=c++1z|-std=c++1z|--std=gnu++1z|-std=gnu++1z|--std=c++2a|-std=c++2a)
|
--std=c++1y|-std=c++1y|--std=gnu++1y|-std=gnu++1y|--std=c++1z|-std=c++1z|--std=gnu++1z|-std=gnu++1z|--std=c++2a|-std=c++2a)
|
||||||
fallback_std_flag="-std=c++14"
|
fallback_std_flag="-std=c++17"
|
||||||
# this is hopefully just occurring in a downstream project during CMake feature tests
|
# this is hopefully just occurring in a downstream project during CMake feature tests
|
||||||
# we really have no choice here but to accept the flag and change to an accepted C++ standard
|
# we really have no choice here but to accept the flag and change to an accepted C++ standard
|
||||||
echo "nvcc_wrapper does not accept standard flags $1 since partial standard flags and standards after C++17 are not supported. nvcc_wrapper will use $fallback_std_flag instead. It is undefined behavior to use this flag. This should only be occurring during CMake configuration."
|
echo "nvcc_wrapper does not accept standard flags $1 since partial standard flags and standards after C++17 are not supported. nvcc_wrapper will use $fallback_std_flag instead. It is undefined behavior to use this flag. This should only be occurring during CMake configuration."
|
||||||
@ -346,35 +346,17 @@ do
|
|||||||
# NVCC only has C++20 from version 12 on
|
# NVCC only has C++20 from version 12 on
|
||||||
cuda_main_version=$([[ $(${nvcc_compiler} --version) =~ V([0-9]+) ]] && echo ${BASH_REMATCH[1]})
|
cuda_main_version=$([[ $(${nvcc_compiler} --version) =~ V([0-9]+) ]] && echo ${BASH_REMATCH[1]})
|
||||||
if [ ${cuda_main_version} -lt 12 ]; then
|
if [ ${cuda_main_version} -lt 12 ]; then
|
||||||
fallback_std_flag="-std=c++14"
|
fallback_std_flag="-std=c++17"
|
||||||
# this is hopefully just occurring in a downstream project during CMake feature tests
|
# this is hopefully just occurring in a downstream project during CMake feature tests
|
||||||
# we really have no choice here but to accept the flag and change to an accepted C++ standard
|
# we really have no choice here but to accept the flag and change to an accepted C++ standard
|
||||||
echo "nvcc_wrapper does not accept standard flags $1 since partial standard flags and standards after C++14 are not supported. nvcc_wrapper will use $fallback_std_flag instead. It is undefined behavior to use this flag. This should only be occurring during CMake configuration."
|
echo "nvcc_wrapper does not accept standard flags $1 since partial standard flags and standards after C++17 are not supported. nvcc_wrapper will use $fallback_std_flag instead. It is undefined behavior to use this flag. This should only be occurring during CMake configuration."
|
||||||
std_flag=$fallback_std_flag
|
std_flag=$fallback_std_flag
|
||||||
else
|
else
|
||||||
std_flag=$1
|
std_flag=$1
|
||||||
fi
|
fi
|
||||||
shared_args="$shared_args $std_flag"
|
shared_args="$shared_args $std_flag"
|
||||||
;;
|
;;
|
||||||
--std=c++17|-std=c++17)
|
--std=c++11|-std=c++11|--std=c++14|-std=c++14|--std=c++17|-std=c++17)
|
||||||
if [ -n "$std_flag" ]; then
|
|
||||||
warn_std_flag
|
|
||||||
shared_args=${shared_args/ $std_flag/}
|
|
||||||
fi
|
|
||||||
# NVCC only has C++17 from version 11 on
|
|
||||||
cuda_main_version=$([[ $(${nvcc_compiler} --version) =~ V([0-9]+) ]] && echo ${BASH_REMATCH[1]})
|
|
||||||
if [ ${cuda_main_version} -lt 11 ]; then
|
|
||||||
fallback_std_flag="-std=c++14"
|
|
||||||
# this is hopefully just occurring in a downstream project during CMake feature tests
|
|
||||||
# we really have no choice here but to accept the flag and change to an accepted C++ standard
|
|
||||||
echo "nvcc_wrapper does not accept standard flags $1 since partial standard flags and standards after C++14 are not supported. nvcc_wrapper will use $fallback_std_flag instead. It is undefined behavior to use this flag. This should only be occurring during CMake configuration."
|
|
||||||
std_flag=$fallback_std_flag
|
|
||||||
else
|
|
||||||
std_flag=$1
|
|
||||||
fi
|
|
||||||
shared_args="$shared_args $std_flag"
|
|
||||||
;;
|
|
||||||
--std=c++11|-std=c++11|--std=c++14|-std=c++14)
|
|
||||||
if [ -n "$std_flag" ]; then
|
if [ -n "$std_flag" ]; then
|
||||||
warn_std_flag
|
warn_std_flag
|
||||||
shared_args=${shared_args/ $std_flag/}
|
shared_args=${shared_args/ $std_flag/}
|
||||||
@ -500,6 +482,20 @@ do
|
|||||||
xlinker_args="$xlinker_args -Xlinker ${1:4:${#1}}"
|
xlinker_args="$xlinker_args -Xlinker ${1:4:${#1}}"
|
||||||
host_linker_args="$host_linker_args ${1:4:${#1}}"
|
host_linker_args="$host_linker_args ${1:4:${#1}}"
|
||||||
;;
|
;;
|
||||||
|
#Handle host assembler options
|
||||||
|
-Wa,*)
|
||||||
|
#To pass the -Wa options to the host compiler via -Xcompiler it is necessary
|
||||||
|
#to use '\\,' for each comma in the options. As users might already add escapes
|
||||||
|
#to the comma by themselves, the escapes are first removed and then only the
|
||||||
|
#required number of \ are added back.
|
||||||
|
xcompiler_args_wa=$(echo -e "$1" | sed -E 's/\\\+,/,/g' | sed -E 's/,/\\\\\\\,/g')
|
||||||
|
if [ $first_xcompiler_arg -eq 1 ]; then
|
||||||
|
xcompiler_args="$xcompiler_args_wa"
|
||||||
|
first_xcompiler_arg=0
|
||||||
|
else
|
||||||
|
xcompiler_args="$xcompiler_args,$xcompiler_args_wa"
|
||||||
|
fi
|
||||||
|
;;
|
||||||
#Handle object files: -x cu applies to all input files, so give them to linker, except if only linking
|
#Handle object files: -x cu applies to all input files, so give them to linker, except if only linking
|
||||||
*.a|*.so|*.o|*.obj)
|
*.a|*.so|*.o|*.obj)
|
||||||
object_files="$object_files $1"
|
object_files="$object_files $1"
|
||||||
|
|||||||
@ -2,46 +2,49 @@
|
|||||||
# loaded by include() and find_package() commands except when invoked with
|
# loaded by include() and find_package() commands except when invoked with
|
||||||
# the NO_POLICY_SCOPE option
|
# the NO_POLICY_SCOPE option
|
||||||
# CMP0057 + NEW -> IN_LIST operator in IF(...)
|
# CMP0057 + NEW -> IN_LIST operator in IF(...)
|
||||||
CMAKE_POLICY(SET CMP0057 NEW)
|
cmake_policy(SET CMP0057 NEW)
|
||||||
|
|
||||||
# Compute paths
|
# Compute paths
|
||||||
@PACKAGE_INIT@
|
@PACKAGE_INIT@
|
||||||
|
|
||||||
#Find dependencies
|
#Find dependencies
|
||||||
INCLUDE(CMakeFindDependencyMacro)
|
include(CMakeFindDependencyMacro)
|
||||||
|
|
||||||
#This needs to go above the KokkosTargets in case
|
#This needs to go above the KokkosTargets in case
|
||||||
#the Kokkos targets depend in some way on the TPL imports
|
#the Kokkos targets depend in some way on the TPL imports
|
||||||
@KOKKOS_TPL_EXPORTS@
|
@KOKKOS_TPL_EXPORTS@
|
||||||
|
|
||||||
GET_FILENAME_COMPONENT(Kokkos_CMAKE_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH)
|
get_filename_component(Kokkos_CMAKE_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH)
|
||||||
INCLUDE("${Kokkos_CMAKE_DIR}/KokkosTargets.cmake")
|
include("${Kokkos_CMAKE_DIR}/KokkosTargets.cmake")
|
||||||
INCLUDE("${Kokkos_CMAKE_DIR}/KokkosConfigCommon.cmake")
|
include("${Kokkos_CMAKE_DIR}/KokkosConfigCommon.cmake")
|
||||||
UNSET(Kokkos_CMAKE_DIR)
|
unset(Kokkos_CMAKE_DIR)
|
||||||
|
|
||||||
# check for conflicts
|
# check for conflicts
|
||||||
IF("launch_compiler" IN_LIST Kokkos_FIND_COMPONENTS AND
|
if("launch_compiler" IN_LIST Kokkos_FIND_COMPONENTS AND "separable_compilation" IN_LIST Kokkos_FIND_COMPONENTS)
|
||||||
"separable_compilation" IN_LIST Kokkos_FIND_COMPONENTS)
|
message(STATUS "'launch_compiler' implies global redirection of targets depending on Kokkos to appropriate compiler.")
|
||||||
MESSAGE(STATUS "'launch_compiler' implies global redirection of targets depending on Kokkos to appropriate compiler.")
|
message(
|
||||||
MESSAGE(STATUS "'separable_compilation' implies explicitly defining where redirection occurs via 'kokkos_compilation(PROJECT|TARGET|SOURCE|DIRECTORY ...)'")
|
STATUS
|
||||||
MESSAGE(FATAL_ERROR "Conflicting COMPONENTS: 'launch_compiler' and 'separable_compilation'")
|
"'separable_compilation' implies explicitly defining where redirection occurs via 'kokkos_compilation(PROJECT|TARGET|SOURCE|DIRECTORY ...)'"
|
||||||
ENDIF()
|
)
|
||||||
|
message(FATAL_ERROR "Conflicting COMPONENTS: 'launch_compiler' and 'separable_compilation'")
|
||||||
|
endif()
|
||||||
|
|
||||||
IF("launch_compiler" IN_LIST Kokkos_FIND_COMPONENTS)
|
if("launch_compiler" IN_LIST Kokkos_FIND_COMPONENTS)
|
||||||
#
|
#
|
||||||
# if find_package(Kokkos COMPONENTS launch_compiler) then rely on the
|
# if find_package(Kokkos COMPONENTS launch_compiler) then rely on the
|
||||||
# RULE_LAUNCH_COMPILE and RULE_LAUNCH_LINK to always redirect to the
|
# RULE_LAUNCH_COMPILE and RULE_LAUNCH_LINK to always redirect to the
|
||||||
# appropriate compiler for Kokkos
|
# appropriate compiler for Kokkos
|
||||||
#
|
#
|
||||||
|
|
||||||
MESSAGE(STATUS "kokkos_launch_compiler is enabled globally. C++ compiler commands with -DKOKKOS_DEPENDENCE will be redirected to the appropriate compiler for Kokkos")
|
message(
|
||||||
kokkos_compilation(
|
STATUS
|
||||||
GLOBAL
|
"kokkos_launch_compiler is enabled globally. C++ compiler commands with -DKOKKOS_DEPENDENCE will be redirected to the appropriate compiler for Kokkos"
|
||||||
CHECK_CUDA_COMPILES)
|
)
|
||||||
|
kokkos_compilation(GLOBAL CHECK_CUDA_COMPILES)
|
||||||
|
|
||||||
ELSEIF(@Kokkos_ENABLE_CUDA@
|
elseif(@Kokkos_ENABLE_CUDA@ AND NOT @KOKKOS_COMPILE_LANGUAGE@ STREQUAL CUDA AND NOT "separable_compilation" IN_LIST
|
||||||
AND NOT @KOKKOS_COMPILE_LANGUAGE@ STREQUAL CUDA
|
Kokkos_FIND_COMPONENTS
|
||||||
AND NOT "separable_compilation" IN_LIST Kokkos_FIND_COMPONENTS)
|
)
|
||||||
#
|
#
|
||||||
# if CUDA was enabled, the compilation language was not set to CUDA, and separable compilation was not
|
# if CUDA was enabled, the compilation language was not set to CUDA, and separable compilation was not
|
||||||
# specified, then set the RULE_LAUNCH_COMPILE and RULE_LAUNCH_LINK globally and
|
# specified, then set the RULE_LAUNCH_COMPILE and RULE_LAUNCH_LINK globally and
|
||||||
@ -54,13 +57,16 @@ ELSEIF(@Kokkos_ENABLE_CUDA@
|
|||||||
kokkos_compiler_is_nvcc(IS_NVCC ${CMAKE_CXX_COMPILER})
|
kokkos_compiler_is_nvcc(IS_NVCC ${CMAKE_CXX_COMPILER})
|
||||||
|
|
||||||
# if not nvcc_wrapper and Kokkos_LAUNCH_COMPILER was not set to OFF
|
# if not nvcc_wrapper and Kokkos_LAUNCH_COMPILER was not set to OFF
|
||||||
IF(NOT IS_NVCC AND (NOT DEFINED Kokkos_LAUNCH_COMPILER OR Kokkos_LAUNCH_COMPILER))
|
if(NOT IS_NVCC AND (NOT DEFINED Kokkos_LAUNCH_COMPILER OR Kokkos_LAUNCH_COMPILER))
|
||||||
MESSAGE(STATUS "kokkos_launch_compiler is enabled globally. C++ compiler commands with -DKOKKOS_DEPENDENCE will be redirected to the appropriate compiler for Kokkos")
|
message(
|
||||||
|
STATUS
|
||||||
|
"kokkos_launch_compiler is enabled globally. C++ compiler commands with -DKOKKOS_DEPENDENCE will be redirected to the appropriate compiler for Kokkos"
|
||||||
|
)
|
||||||
kokkos_compilation(GLOBAL)
|
kokkos_compilation(GLOBAL)
|
||||||
ENDIF()
|
endif()
|
||||||
|
|
||||||
# be mindful of the environment, pollution is bad
|
# be mindful of the environment, pollution is bad
|
||||||
UNSET(IS_NVCC)
|
unset(IS_NVCC)
|
||||||
ENDIF()
|
endif()
|
||||||
|
|
||||||
set(Kokkos_COMPILE_LANGUAGE @KOKKOS_COMPILE_LANGUAGE@)
|
set(Kokkos_COMPILE_LANGUAGE @KOKKOS_COMPILE_LANGUAGE@)
|
||||||
|
|||||||
@ -1,67 +1,67 @@
|
|||||||
SET(Kokkos_DEVICES @KOKKOS_ENABLED_DEVICES@)
|
set(Kokkos_DEVICES @KOKKOS_ENABLED_DEVICES@)
|
||||||
SET(Kokkos_OPTIONS @KOKKOS_ENABLED_OPTIONS@)
|
set(Kokkos_OPTIONS @KOKKOS_ENABLED_OPTIONS@)
|
||||||
SET(Kokkos_TPLS @KOKKOS_ENABLED_TPLS@)
|
set(Kokkos_TPLS @KOKKOS_ENABLED_TPLS@)
|
||||||
SET(Kokkos_ARCH @KOKKOS_ENABLED_ARCH_LIST@)
|
set(Kokkos_ARCH @KOKKOS_ENABLED_ARCH_LIST@)
|
||||||
SET(Kokkos_CXX_COMPILER "@CMAKE_CXX_COMPILER@")
|
set(Kokkos_CXX_COMPILER "@CMAKE_CXX_COMPILER@")
|
||||||
SET(Kokkos_CXX_COMPILER_ID "@KOKKOS_CXX_COMPILER_ID@")
|
set(Kokkos_CXX_COMPILER_ID "@KOKKOS_CXX_COMPILER_ID@")
|
||||||
SET(Kokkos_CXX_COMPILER_VERSION "@KOKKOS_CXX_COMPILER_VERSION@")
|
set(Kokkos_CXX_COMPILER_VERSION "@KOKKOS_CXX_COMPILER_VERSION@")
|
||||||
SET(Kokkos_CXX_STANDARD @KOKKOS_CXX_STANDARD@)
|
set(Kokkos_CXX_STANDARD @KOKKOS_CXX_STANDARD@)
|
||||||
|
|
||||||
# Required to be a TriBITS-compliant external package
|
# Required to be a TriBITS-compliant external package
|
||||||
IF(NOT TARGET Kokkos::all_libs)
|
if(NOT TARGET Kokkos::all_libs)
|
||||||
# CMake Error at <prefix>/lib/cmake/Kokkos/KokkosConfigCommon.cmake:10 (ADD_LIBRARY):
|
# CMake Error at <prefix>/lib/cmake/Kokkos/KokkosConfigCommon.cmake:10 (ADD_LIBRARY):
|
||||||
# ADD_LIBRARY cannot create ALIAS target "Kokkos::all_libs" because target
|
# ADD_LIBRARY cannot create ALIAS target "Kokkos::all_libs" because target
|
||||||
# "Kokkos::kokkos" is imported but not globally visible.
|
# "Kokkos::kokkos" is imported but not globally visible.
|
||||||
IF(CMAKE_VERSION VERSION_LESS "3.18")
|
if(CMAKE_VERSION VERSION_LESS "3.18")
|
||||||
SET_TARGET_PROPERTIES(Kokkos::kokkos PROPERTIES IMPORTED_GLOBAL ON)
|
set_target_properties(Kokkos::kokkos PROPERTIES IMPORTED_GLOBAL ON)
|
||||||
ENDIF()
|
endif()
|
||||||
ADD_LIBRARY(Kokkos::all_libs ALIAS Kokkos::kokkos)
|
add_library(Kokkos::all_libs ALIAS Kokkos::kokkos)
|
||||||
ENDIF()
|
endif()
|
||||||
|
|
||||||
# Export Kokkos_ENABLE_<BACKEND> for each backend that was enabled.
|
# Export Kokkos_ENABLE_<BACKEND> for each backend that was enabled.
|
||||||
# NOTE: "Devices" is a little bit of a misnomer here. These are really
|
# NOTE: "Devices" is a little bit of a misnomer here. These are really
|
||||||
# backends, e.g. Kokkos_ENABLE_OPENMP, Kokkos_ENABLE_CUDA, Kokkos_ENABLE_HIP,
|
# backends, e.g. Kokkos_ENABLE_OPENMP, Kokkos_ENABLE_CUDA, Kokkos_ENABLE_HIP,
|
||||||
# or Kokkos_ENABLE_SYCL.
|
# or Kokkos_ENABLE_SYCL.
|
||||||
FOREACH(DEV ${Kokkos_DEVICES})
|
foreach(DEV ${Kokkos_DEVICES})
|
||||||
SET(Kokkos_ENABLE_${DEV} ON)
|
set(Kokkos_ENABLE_${DEV} ON)
|
||||||
ENDFOREACH()
|
endforeach()
|
||||||
# Export relevant Kokkos_ENABLE<OPTION> variables, e.g.
|
# Export relevant Kokkos_ENABLE<OPTION> variables, e.g.
|
||||||
# Kokkos_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE, Kokkos_ENABLE_DEBUG, etc.
|
# Kokkos_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE, Kokkos_ENABLE_DEBUG, etc.
|
||||||
FOREACH(OPT ${Kokkos_OPTIONS})
|
foreach(OPT ${Kokkos_OPTIONS})
|
||||||
SET(Kokkos_ENABLE_${OPT} ON)
|
set(Kokkos_ENABLE_${OPT} ON)
|
||||||
ENDFOREACH()
|
endforeach()
|
||||||
|
|
||||||
IF(Kokkos_ENABLE_CUDA)
|
if(Kokkos_ENABLE_CUDA)
|
||||||
SET(Kokkos_CUDA_ARCHITECTURES @KOKKOS_CUDA_ARCHITECTURES@)
|
set(Kokkos_CUDA_ARCHITECTURES @KOKKOS_CUDA_ARCHITECTURES@)
|
||||||
ENDIF()
|
endif()
|
||||||
|
|
||||||
IF(Kokkos_ENABLE_HIP)
|
if(Kokkos_ENABLE_HIP)
|
||||||
SET(Kokkos_HIP_ARCHITECTURES @KOKKOS_HIP_ARCHITECTURES@)
|
set(Kokkos_HIP_ARCHITECTURES @KOKKOS_HIP_ARCHITECTURES@)
|
||||||
ENDIF()
|
endif()
|
||||||
|
|
||||||
IF(NOT Kokkos_FIND_QUIETLY)
|
if(NOT Kokkos_FIND_QUIETLY)
|
||||||
MESSAGE(STATUS "Enabled Kokkos devices: ${Kokkos_DEVICES}")
|
message(STATUS "Enabled Kokkos devices: ${Kokkos_DEVICES}")
|
||||||
ENDIF()
|
endif()
|
||||||
|
|
||||||
IF (Kokkos_ENABLE_CUDA)
|
if(Kokkos_ENABLE_CUDA)
|
||||||
# If we are building CUDA, we have tricked CMake because we declare a CXX project
|
# If we are building CUDA, we have tricked CMake because we declare a CXX project
|
||||||
# If the default C++ standard for a given compiler matches the requested
|
# If the default C++ standard for a given compiler matches the requested
|
||||||
# standard, then CMake just omits the -std flag in later versions of CMake
|
# standard, then CMake just omits the -std flag in later versions of CMake
|
||||||
# This breaks CUDA compilation (CUDA compiler can have a different default
|
# This breaks CUDA compilation (CUDA compiler can have a different default
|
||||||
# -std then the underlying host compiler by itself). Setting this variable
|
# -std then the underlying host compiler by itself). Setting this variable
|
||||||
# forces CMake to always add the -std flag even if it thinks it doesn't need it
|
# forces CMake to always add the -std flag even if it thinks it doesn't need it
|
||||||
SET(CMAKE_CXX_STANDARD_DEFAULT 98 CACHE INTERNAL "" FORCE)
|
set(CMAKE_CXX_STANDARD_DEFAULT 98 CACHE INTERNAL "" FORCE)
|
||||||
ENDIF()
|
endif()
|
||||||
|
|
||||||
SET(KOKKOS_USE_CXX_EXTENSIONS @KOKKOS_USE_CXX_EXTENSIONS@)
|
set(KOKKOS_USE_CXX_EXTENSIONS @KOKKOS_USE_CXX_EXTENSIONS@)
|
||||||
IF (NOT DEFINED CMAKE_CXX_EXTENSIONS OR CMAKE_CXX_EXTENSIONS)
|
if(NOT DEFINED CMAKE_CXX_EXTENSIONS OR CMAKE_CXX_EXTENSIONS)
|
||||||
IF (NOT KOKKOS_USE_CXX_EXTENSIONS)
|
if(NOT KOKKOS_USE_CXX_EXTENSIONS)
|
||||||
MESSAGE(WARNING "The installed Kokkos configuration does not support CXX extensions. Forcing -DCMAKE_CXX_EXTENSIONS=Off")
|
message(
|
||||||
SET(CMAKE_CXX_EXTENSIONS OFF CACHE BOOL "" FORCE)
|
WARNING "The installed Kokkos configuration does not support CXX extensions. Forcing -DCMAKE_CXX_EXTENSIONS=Off"
|
||||||
ENDIF()
|
)
|
||||||
ENDIF()
|
set(CMAKE_CXX_EXTENSIONS OFF CACHE BOOL "" FORCE)
|
||||||
|
endif()
|
||||||
include(FindPackageHandleStandardArgs)
|
endif()
|
||||||
|
|
||||||
# This function makes sure that Kokkos was built with the requested backends
|
# This function makes sure that Kokkos was built with the requested backends
|
||||||
# and target architectures and generates a fatal error if it was not.
|
# and target architectures and generates a fatal error if it was not.
|
||||||
@ -89,30 +89,25 @@ function(kokkos_check)
|
|||||||
endforeach()
|
endforeach()
|
||||||
set(KOKKOS_CHECK_SUCCESS TRUE)
|
set(KOKKOS_CHECK_SUCCESS TRUE)
|
||||||
foreach(arg ${REQUESTED_ARGS})
|
foreach(arg ${REQUESTED_ARGS})
|
||||||
# Define variables named after the required arguments that are provided by
|
set(MISSING_OPTIONS "")
|
||||||
# the Kokkos install.
|
|
||||||
foreach(requested ${KOKKOS_CHECK_${arg}})
|
foreach(requested ${KOKKOS_CHECK_${arg}})
|
||||||
|
set(FOUND_MATCHING_OPTION FALSE)
|
||||||
foreach(provided ${Kokkos_${arg}})
|
foreach(provided ${Kokkos_${arg}})
|
||||||
STRING(TOUPPER ${requested} REQUESTED_UC)
|
string(TOUPPER ${requested} REQUESTED_UC)
|
||||||
STRING(TOUPPER ${provided} PROVIDED_UC)
|
string(TOUPPER ${provided} PROVIDED_UC)
|
||||||
if(PROVIDED_UC STREQUAL REQUESTED_UC)
|
if(PROVIDED_UC STREQUAL REQUESTED_UC)
|
||||||
string(REPLACE ";" " " ${requested} "${KOKKOS_CHECK_${arg}}")
|
set(FOUND_MATCHING_OPTION TRUE)
|
||||||
endif()
|
endif()
|
||||||
endforeach()
|
endforeach()
|
||||||
endforeach()
|
if(NOT FOUND_MATCHING_OPTION)
|
||||||
# Somewhat divert the CMake function below from its original purpose and
|
list(APPEND MISSING_OPTIONS ${requested})
|
||||||
# use it to check that there are variables defined for all required
|
|
||||||
# arguments. Success or failure messages will be displayed but we are
|
|
||||||
# responsible for signaling failure and skip the build system generation.
|
|
||||||
if (KOKKOS_CHECK_RETURN_VALUE)
|
|
||||||
set(Kokkos_${arg}_FIND_QUIETLY ON)
|
|
||||||
endif()
|
|
||||||
find_package_handle_standard_args("Kokkos_${arg}" DEFAULT_MSG
|
|
||||||
${KOKKOS_CHECK_${arg}})
|
|
||||||
if(NOT Kokkos_${arg}_FOUND)
|
|
||||||
set(KOKKOS_CHECK_SUCCESS FALSE)
|
set(KOKKOS_CHECK_SUCCESS FALSE)
|
||||||
endif()
|
endif()
|
||||||
endforeach()
|
endforeach()
|
||||||
|
if(NOT KOKKOS_CHECK_SUCCESS AND NOT KOKKOS_CHECK_RETURN_VALUE)
|
||||||
|
message(STATUS "Could NOT find Kokkos_${arg} (missing: ${MISSING_OPTIONS})")
|
||||||
|
endif()
|
||||||
|
endforeach()
|
||||||
if(NOT KOKKOS_CHECK_SUCCESS AND NOT KOKKOS_CHECK_RETURN_VALUE)
|
if(NOT KOKKOS_CHECK_SUCCESS AND NOT KOKKOS_CHECK_RETURN_VALUE)
|
||||||
message(FATAL_ERROR "Kokkos does NOT provide all backends and/or architectures requested")
|
message(FATAL_ERROR "Kokkos does NOT provide all backends and/or architectures requested")
|
||||||
else()
|
else()
|
||||||
@ -122,32 +117,35 @@ endfunction()
|
|||||||
|
|
||||||
# A test to check whether a downstream project set the C++ compiler to NVCC or not
|
# A test to check whether a downstream project set the C++ compiler to NVCC or not
|
||||||
# this is called only when Kokkos was installed with Kokkos_ENABLE_CUDA=ON
|
# this is called only when Kokkos was installed with Kokkos_ENABLE_CUDA=ON
|
||||||
FUNCTION(kokkos_compiler_is_nvcc VAR COMPILER)
|
function(kokkos_compiler_is_nvcc VAR COMPILER)
|
||||||
# Check if the compiler is nvcc (which really means nvcc_wrapper).
|
# Check if the compiler is nvcc (which really means nvcc_wrapper).
|
||||||
EXECUTE_PROCESS(COMMAND ${COMPILER} ${ARGN} --version
|
execute_process(
|
||||||
|
COMMAND ${COMPILER} ${ARGN} --version
|
||||||
OUTPUT_VARIABLE INTERNAL_COMPILER_VERSION
|
OUTPUT_VARIABLE INTERNAL_COMPILER_VERSION
|
||||||
OUTPUT_STRIP_TRAILING_WHITESPACE
|
OUTPUT_STRIP_TRAILING_WHITESPACE
|
||||||
RESULT_VARIABLE RET)
|
RESULT_VARIABLE RET
|
||||||
|
)
|
||||||
# something went wrong
|
# something went wrong
|
||||||
IF(RET GREATER 0)
|
if(RET GREATER 0)
|
||||||
SET(${VAR} false PARENT_SCOPE)
|
set(${VAR} false PARENT_SCOPE)
|
||||||
ELSE()
|
else()
|
||||||
STRING(REPLACE "\n" " - " INTERNAL_COMPILER_VERSION_ONE_LINE ${INTERNAL_COMPILER_VERSION} )
|
string(REPLACE "\n" " - " INTERNAL_COMPILER_VERSION_ONE_LINE ${INTERNAL_COMPILER_VERSION})
|
||||||
STRING(FIND ${INTERNAL_COMPILER_VERSION_ONE_LINE} "nvcc" INTERNAL_COMPILER_VERSION_CONTAINS_NVCC)
|
string(FIND ${INTERNAL_COMPILER_VERSION_ONE_LINE} "nvcc" INTERNAL_COMPILER_VERSION_CONTAINS_NVCC)
|
||||||
STRING(REGEX REPLACE "^ +" "" INTERNAL_HAVE_COMPILER_NVCC "${INTERNAL_HAVE_COMPILER_NVCC}")
|
string(REGEX REPLACE "^ +" "" INTERNAL_HAVE_COMPILER_NVCC "${INTERNAL_HAVE_COMPILER_NVCC}")
|
||||||
IF(${INTERNAL_COMPILER_VERSION_CONTAINS_NVCC} GREATER -1)
|
if(${INTERNAL_COMPILER_VERSION_CONTAINS_NVCC} GREATER -1)
|
||||||
SET(${VAR} true PARENT_SCOPE)
|
set(${VAR} true PARENT_SCOPE)
|
||||||
ELSE()
|
else()
|
||||||
SET(${VAR} false PARENT_SCOPE)
|
set(${VAR} false PARENT_SCOPE)
|
||||||
ENDIF()
|
endif()
|
||||||
ENDIF()
|
endif()
|
||||||
ENDFUNCTION()
|
endfunction()
|
||||||
|
|
||||||
# this function checks whether the current CXX compiler supports building CUDA
|
# this function checks whether the current CXX compiler supports building CUDA
|
||||||
FUNCTION(kokkos_cxx_compiler_cuda_test _VAR _COMPILER)
|
function(kokkos_cxx_compiler_cuda_test _VAR _COMPILER)
|
||||||
|
|
||||||
FILE(WRITE ${PROJECT_BINARY_DIR}/compile_tests/compiles_cuda.cu
|
file(
|
||||||
"
|
WRITE ${PROJECT_BINARY_DIR}/compile_tests/compiles_cuda.cu
|
||||||
|
"
|
||||||
#include <cuda.h>
|
#include <cuda.h>
|
||||||
#include <cstdlib>
|
#include <cstdlib>
|
||||||
|
|
||||||
@ -171,34 +169,39 @@ int main()
|
|||||||
cudaDeviceSynchronize();
|
cudaDeviceSynchronize();
|
||||||
return EXIT_SUCCESS;
|
return EXIT_SUCCESS;
|
||||||
}
|
}
|
||||||
")
|
"
|
||||||
|
)
|
||||||
|
|
||||||
# save the command for debugging
|
# save the command for debugging
|
||||||
SET(_COMMANDS "${_COMPILER} ${ARGN} -c ${PROJECT_BINARY_DIR}/compile_tests/compiles_cuda.cu")
|
set(_COMMANDS "${_COMPILER} ${ARGN} -c ${PROJECT_BINARY_DIR}/compile_tests/compiles_cuda.cu")
|
||||||
|
|
||||||
# use execute_process instead of try compile because we want to set custom compiler
|
# use execute_process instead of try compile because we want to set custom compiler
|
||||||
EXECUTE_PROCESS(COMMAND ${_COMPILER} ${ARGN} -c ${PROJECT_BINARY_DIR}/compile_tests/compiles_cuda.cu
|
execute_process(
|
||||||
|
COMMAND ${_COMPILER} ${ARGN} -c ${PROJECT_BINARY_DIR}/compile_tests/compiles_cuda.cu
|
||||||
RESULT_VARIABLE _RET
|
RESULT_VARIABLE _RET
|
||||||
WORKING_DIRECTORY ${PROJECT_BINARY_DIR}/compile_tests
|
WORKING_DIRECTORY ${PROJECT_BINARY_DIR}/compile_tests
|
||||||
TIMEOUT 15
|
TIMEOUT 15
|
||||||
OUTPUT_QUIET
|
OUTPUT_QUIET ERROR_QUIET
|
||||||
ERROR_QUIET)
|
)
|
||||||
|
|
||||||
IF(NOT _RET EQUAL 0)
|
if(NOT _RET EQUAL 0)
|
||||||
# save the command for debugging
|
# save the command for debugging
|
||||||
SET(_COMMANDS "${_COMMAND}\n${_COMPILER} --cuda-gpu-arch=sm_35 ${ARGN} -c ${PROJECT_BINARY_DIR}/compile_tests/compiles_cuda.cu")
|
set(_COMMANDS
|
||||||
|
"${_COMMAND}\n${_COMPILER} --cuda-gpu-arch=sm_35 ${ARGN} -c ${PROJECT_BINARY_DIR}/compile_tests/compiles_cuda.cu"
|
||||||
|
)
|
||||||
# try the compile test again with clang arguments
|
# try the compile test again with clang arguments
|
||||||
EXECUTE_PROCESS(COMMAND ${_COMPILER} --cuda-gpu-arch=sm_35 -c ${PROJECT_BINARY_DIR}/compile_tests/compiles_cuda.cu
|
execute_process(
|
||||||
|
COMMAND ${_COMPILER} --cuda-gpu-arch=sm_35 -c ${PROJECT_BINARY_DIR}/compile_tests/compiles_cuda.cu
|
||||||
RESULT_VARIABLE _RET
|
RESULT_VARIABLE _RET
|
||||||
WORKING_DIRECTORY ${PROJECT_BINARY_DIR}/compile_tests
|
WORKING_DIRECTORY ${PROJECT_BINARY_DIR}/compile_tests
|
||||||
TIMEOUT 15
|
TIMEOUT 15
|
||||||
OUTPUT_QUIET
|
OUTPUT_QUIET ERROR_QUIET
|
||||||
ERROR_QUIET)
|
)
|
||||||
ENDIF()
|
endif()
|
||||||
|
|
||||||
SET(${_VAR}_COMMANDS "${_COMMANDS}" PARENT_SCOPE)
|
set(${_VAR}_COMMANDS "${_COMMANDS}" PARENT_SCOPE)
|
||||||
SET(${_VAR} ${_RET} PARENT_SCOPE)
|
set(${_VAR} ${_RET} PARENT_SCOPE)
|
||||||
ENDFUNCTION()
|
endfunction()
|
||||||
|
|
||||||
# this function is provided to easily select which files use the same compiler as Kokkos
|
# this function is provided to easily select which files use the same compiler as Kokkos
|
||||||
# when it was installed (or nvcc_wrapper):
|
# when it was installed (or nvcc_wrapper):
|
||||||
@ -215,94 +218,107 @@ ENDFUNCTION()
|
|||||||
#
|
#
|
||||||
# Use CHECK_CUDA_COMPILES to run a check when CUDA is enabled
|
# Use CHECK_CUDA_COMPILES to run a check when CUDA is enabled
|
||||||
#
|
#
|
||||||
FUNCTION(kokkos_compilation)
|
function(kokkos_compilation)
|
||||||
CMAKE_PARSE_ARGUMENTS(COMP
|
cmake_parse_arguments(
|
||||||
"GLOBAL;PROJECT;CHECK_CUDA_COMPILES"
|
COMP "GLOBAL;PROJECT;CHECK_CUDA_COMPILES" "COMPILER" "DIRECTORY;TARGET;SOURCE;COMMAND_PREFIX" ${ARGN}
|
||||||
"COMPILER"
|
)
|
||||||
"DIRECTORY;TARGET;SOURCE;COMMAND_PREFIX"
|
|
||||||
${ARGN})
|
|
||||||
|
|
||||||
# if built w/o CUDA support, we want to basically make this a no-op
|
# if built w/o CUDA support, we want to basically make this a no-op
|
||||||
SET(_Kokkos_ENABLE_CUDA @Kokkos_ENABLE_CUDA@)
|
set(_Kokkos_ENABLE_CUDA @Kokkos_ENABLE_CUDA@)
|
||||||
|
|
||||||
|
if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.17)
|
||||||
IF(CMAKE_VERSION VERSION_GREATER_EQUAL 3.17)
|
set(MAYBE_CURRENT_INSTALLATION_ROOT "${CMAKE_CURRENT_FUNCTION_LIST_DIR}/../../..")
|
||||||
SET(MAYBE_CURRENT_INSTALLATION_ROOT "${CMAKE_CURRENT_FUNCTION_LIST_DIR}/../../..")
|
endif()
|
||||||
ENDIF()
|
|
||||||
|
|
||||||
# search relative first and then absolute
|
# search relative first and then absolute
|
||||||
SET(_HINTS "${MAYBE_CURRENT_INSTALLATION_ROOT}" "@CMAKE_INSTALL_PREFIX@")
|
set(_HINTS "${MAYBE_CURRENT_INSTALLATION_ROOT}" "@CMAKE_INSTALL_PREFIX@")
|
||||||
|
|
||||||
# find kokkos_launch_compiler
|
# find kokkos_launch_compiler
|
||||||
FIND_PROGRAM(Kokkos_COMPILE_LAUNCHER
|
find_program(
|
||||||
|
Kokkos_COMPILE_LAUNCHER
|
||||||
NAMES kokkos_launch_compiler
|
NAMES kokkos_launch_compiler
|
||||||
HINTS ${_HINTS}
|
HINTS ${_HINTS}
|
||||||
PATHS ${_HINTS}
|
PATHS ${_HINTS}
|
||||||
PATH_SUFFIXES bin)
|
PATH_SUFFIXES bin
|
||||||
|
)
|
||||||
|
|
||||||
IF(NOT Kokkos_COMPILE_LAUNCHER)
|
if(NOT Kokkos_COMPILE_LAUNCHER)
|
||||||
MESSAGE(FATAL_ERROR "Kokkos could not find 'kokkos_launch_compiler'. Please set '-DKokkos_COMPILE_LAUNCHER=/path/to/launcher'")
|
message(
|
||||||
ENDIF()
|
FATAL_ERROR
|
||||||
|
"Kokkos could not find 'kokkos_launch_compiler'. Please set '-DKokkos_COMPILE_LAUNCHER=/path/to/launcher'"
|
||||||
|
)
|
||||||
|
endif()
|
||||||
|
|
||||||
# if COMPILER was not specified, assume Kokkos_CXX_COMPILER
|
# if COMPILER was not specified, assume Kokkos_CXX_COMPILER
|
||||||
IF(NOT COMP_COMPILER)
|
if(NOT COMP_COMPILER)
|
||||||
SET(COMP_COMPILER ${Kokkos_CXX_COMPILER})
|
set(COMP_COMPILER ${Kokkos_CXX_COMPILER})
|
||||||
IF(_Kokkos_ENABLE_CUDA AND Kokkos_CXX_COMPILER_ID STREQUAL NVIDIA)
|
if(_Kokkos_ENABLE_CUDA AND Kokkos_CXX_COMPILER_ID STREQUAL NVIDIA)
|
||||||
# find nvcc_wrapper
|
# find nvcc_wrapper
|
||||||
FIND_PROGRAM(Kokkos_NVCC_WRAPPER
|
find_program(
|
||||||
|
Kokkos_NVCC_WRAPPER
|
||||||
NAMES nvcc_wrapper
|
NAMES nvcc_wrapper
|
||||||
HINTS ${_HINTS}
|
HINTS ${_HINTS}
|
||||||
PATHS ${_HINTS}
|
PATHS ${_HINTS}
|
||||||
PATH_SUFFIXES bin)
|
PATH_SUFFIXES bin
|
||||||
|
)
|
||||||
# fatal if we can't nvcc_wrapper
|
# fatal if we can't nvcc_wrapper
|
||||||
IF(NOT Kokkos_NVCC_WRAPPER)
|
if(NOT Kokkos_NVCC_WRAPPER)
|
||||||
MESSAGE(FATAL_ERROR "Kokkos could not find nvcc_wrapper. Please set '-DKokkos_NVCC_WRAPPER=/path/to/nvcc_wrapper'")
|
message(
|
||||||
ENDIF()
|
FATAL_ERROR "Kokkos could not find nvcc_wrapper. Please set '-DKokkos_NVCC_WRAPPER=/path/to/nvcc_wrapper'"
|
||||||
SET(COMP_COMPILER ${Kokkos_NVCC_WRAPPER})
|
)
|
||||||
ENDIF()
|
endif()
|
||||||
ENDIF()
|
set(COMP_COMPILER ${Kokkos_NVCC_WRAPPER})
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
|
||||||
# check that the original compiler still exists!
|
# check that the original compiler still exists!
|
||||||
IF(NOT EXISTS ${COMP_COMPILER})
|
if(NOT EXISTS ${COMP_COMPILER})
|
||||||
MESSAGE(FATAL_ERROR "Kokkos could not find original compiler: '${COMP_COMPILER}'")
|
message(FATAL_ERROR "Kokkos could not find original compiler: '${COMP_COMPILER}'")
|
||||||
ENDIF()
|
endif()
|
||||||
|
|
||||||
# try to ensure that compiling cuda code works!
|
# try to ensure that compiling cuda code works!
|
||||||
IF(_Kokkos_ENABLE_CUDA AND COMP_CHECK_CUDA_COMPILES)
|
if(_Kokkos_ENABLE_CUDA AND COMP_CHECK_CUDA_COMPILES)
|
||||||
|
|
||||||
# this may fail if kokkos_compiler launcher was used during install
|
# this may fail if kokkos_compiler launcher was used during install
|
||||||
kokkos_cxx_compiler_cuda_test(_COMPILES_CUDA
|
kokkos_cxx_compiler_cuda_test(_COMPILES_CUDA ${Kokkos_COMPILE_LAUNCHER} ${COMP_COMPILER} ${CMAKE_CXX_COMPILER})
|
||||||
${Kokkos_COMPILE_LAUNCHER} ${COMP_COMPILER} ${CMAKE_CXX_COMPILER})
|
|
||||||
|
|
||||||
# if above failed, throw an error
|
# if above failed, throw an error
|
||||||
IF(NOT _COMPILES_CUDA)
|
if(NOT _COMPILES_CUDA)
|
||||||
MESSAGE(FATAL_ERROR "kokkos_cxx_compiler_cuda_test failed! Test commands:\n${_COMPILES_CUDA_COMMANDS}")
|
message(FATAL_ERROR "kokkos_cxx_compiler_cuda_test failed! Test commands:\n${_COMPILES_CUDA_COMMANDS}")
|
||||||
ENDIF()
|
endif()
|
||||||
ENDIF()
|
endif()
|
||||||
|
|
||||||
IF(COMP_COMMAND_PREFIX)
|
if(COMP_COMMAND_PREFIX)
|
||||||
SET(_PREFIX "${COMP_COMMAND_PREFIX}")
|
set(_PREFIX "${COMP_COMMAND_PREFIX}")
|
||||||
STRING(REPLACE ";" " " _PREFIX "${COMP_COMMAND_PREFIX}")
|
string(REPLACE ";" " " _PREFIX "${COMP_COMMAND_PREFIX}")
|
||||||
SET(Kokkos_COMPILER_LAUNCHER "${_PREFIX} ${Kokkos_COMPILE_LAUNCHER}")
|
set(Kokkos_COMPILER_LAUNCHER "${_PREFIX} ${Kokkos_COMPILE_LAUNCHER}")
|
||||||
ENDIF()
|
endif()
|
||||||
|
|
||||||
IF(COMP_GLOBAL)
|
if(COMP_GLOBAL)
|
||||||
# if global, don't bother setting others
|
# if global, don't bother setting others
|
||||||
SET_PROPERTY(GLOBAL PROPERTY RULE_LAUNCH_COMPILE "${Kokkos_COMPILE_LAUNCHER} ${COMP_COMPILER} ${CMAKE_CXX_COMPILER}")
|
set_property(
|
||||||
SET_PROPERTY(GLOBAL PROPERTY RULE_LAUNCH_LINK "${Kokkos_COMPILE_LAUNCHER} ${COMP_COMPILER} ${CMAKE_CXX_COMPILER}")
|
GLOBAL PROPERTY RULE_LAUNCH_COMPILE "${Kokkos_COMPILE_LAUNCHER} ${COMP_COMPILER} ${CMAKE_CXX_COMPILER}"
|
||||||
ELSE()
|
)
|
||||||
FOREACH(_TYPE PROJECT DIRECTORY TARGET SOURCE)
|
set_property(GLOBAL PROPERTY RULE_LAUNCH_LINK "${Kokkos_COMPILE_LAUNCHER} ${COMP_COMPILER} ${CMAKE_CXX_COMPILER}")
|
||||||
|
else()
|
||||||
|
foreach(_TYPE PROJECT DIRECTORY TARGET SOURCE)
|
||||||
# make project/subproject scoping easy, e.g. KokkosCompilation(PROJECT) after project(...)
|
# make project/subproject scoping easy, e.g. KokkosCompilation(PROJECT) after project(...)
|
||||||
IF("${_TYPE}" STREQUAL "PROJECT" AND COMP_${_TYPE})
|
if("${_TYPE}" STREQUAL "PROJECT" AND COMP_${_TYPE})
|
||||||
LIST(APPEND COMP_DIRECTORY ${PROJECT_SOURCE_DIR})
|
list(APPEND COMP_DIRECTORY ${PROJECT_SOURCE_DIR})
|
||||||
UNSET(COMP_${_TYPE})
|
unset(COMP_${_TYPE})
|
||||||
ENDIF()
|
endif()
|
||||||
# set the properties if defined
|
# set the properties if defined
|
||||||
IF(COMP_${_TYPE})
|
if(COMP_${_TYPE})
|
||||||
# MESSAGE(STATUS "Using ${COMP_COMPILER} :: ${_TYPE} :: ${COMP_${_TYPE}}")
|
# MESSAGE(STATUS "Using ${COMP_COMPILER} :: ${_TYPE} :: ${COMP_${_TYPE}}")
|
||||||
SET_PROPERTY(${_TYPE} ${COMP_${_TYPE}} PROPERTY RULE_LAUNCH_COMPILE "${Kokkos_COMPILE_LAUNCHER} ${COMP_COMPILER} ${CMAKE_CXX_COMPILER}")
|
set_property(
|
||||||
SET_PROPERTY(${_TYPE} ${COMP_${_TYPE}} PROPERTY RULE_LAUNCH_LINK "${Kokkos_COMPILE_LAUNCHER} ${COMP_COMPILER} ${CMAKE_CXX_COMPILER}")
|
${_TYPE} ${COMP_${_TYPE}} PROPERTY RULE_LAUNCH_COMPILE
|
||||||
ENDIF()
|
"${Kokkos_COMPILE_LAUNCHER} ${COMP_COMPILER} ${CMAKE_CXX_COMPILER}"
|
||||||
ENDFOREACH()
|
)
|
||||||
ENDIF()
|
set_property(
|
||||||
ENDFUNCTION()
|
${_TYPE} ${COMP_${_TYPE}} PROPERTY RULE_LAUNCH_LINK
|
||||||
|
"${Kokkos_COMPILE_LAUNCHER} ${COMP_COMPILER} ${CMAKE_CXX_COMPILER}"
|
||||||
|
)
|
||||||
|
endif()
|
||||||
|
endforeach()
|
||||||
|
endif()
|
||||||
|
endfunction()
|
||||||
|
|||||||
@ -9,7 +9,9 @@
|
|||||||
// KOKKOS_VERSION % 100 is the patch level
|
// KOKKOS_VERSION % 100 is the patch level
|
||||||
// KOKKOS_VERSION / 100 % 100 is the minor version
|
// KOKKOS_VERSION / 100 % 100 is the minor version
|
||||||
// KOKKOS_VERSION / 10000 is the major version
|
// KOKKOS_VERSION / 10000 is the major version
|
||||||
#define KOKKOS_VERSION @KOKKOS_VERSION@
|
#cmakedefine KOKKOS_VERSION @KOKKOS_VERSION@
|
||||||
|
// Not using #cmakedefine below because a "0" FOO version number
|
||||||
|
// yields /* undef KOKKOS_VERSION_FOO */
|
||||||
#define KOKKOS_VERSION_MAJOR @KOKKOS_VERSION_MAJOR@
|
#define KOKKOS_VERSION_MAJOR @KOKKOS_VERSION_MAJOR@
|
||||||
#define KOKKOS_VERSION_MINOR @KOKKOS_VERSION_MINOR@
|
#define KOKKOS_VERSION_MINOR @KOKKOS_VERSION_MINOR@
|
||||||
#define KOKKOS_VERSION_PATCH @KOKKOS_VERSION_PATCH@
|
#define KOKKOS_VERSION_PATCH @KOKKOS_VERSION_PATCH@
|
||||||
@ -116,6 +118,7 @@
|
|||||||
#cmakedefine KOKKOS_ARCH_AMD_ZEN
|
#cmakedefine KOKKOS_ARCH_AMD_ZEN
|
||||||
#cmakedefine KOKKOS_ARCH_AMD_ZEN2
|
#cmakedefine KOKKOS_ARCH_AMD_ZEN2
|
||||||
#cmakedefine KOKKOS_ARCH_AMD_ZEN3
|
#cmakedefine KOKKOS_ARCH_AMD_ZEN3
|
||||||
|
#cmakedefine KOKKOS_ARCH_AMD_ZEN4
|
||||||
#cmakedefine KOKKOS_ARCH_AMD_GFX906
|
#cmakedefine KOKKOS_ARCH_AMD_GFX906
|
||||||
#cmakedefine KOKKOS_ARCH_AMD_GFX908
|
#cmakedefine KOKKOS_ARCH_AMD_GFX908
|
||||||
#cmakedefine KOKKOS_ARCH_AMD_GFX90A
|
#cmakedefine KOKKOS_ARCH_AMD_GFX90A
|
||||||
|
|||||||
@ -11,9 +11,16 @@ if(KOKKOS_CXX_HOST_COMPILER_ID STREQUAL NVHPC AND CMAKE_VERSION VERSION_LESS "3.
|
|||||||
message(FATAL_ERROR "Using NVHPC as host compiler requires at least CMake 3.20.1")
|
message(FATAL_ERROR "Using NVHPC as host compiler requires at least CMake 3.20.1")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
set(TPL_CUDA_LIBRARIES "")
|
||||||
|
if(KOKKOS_ENABLE_COMPILE_AS_CMAKE_LANGUAGE)
|
||||||
|
set(TPL_CUDA_LIBRARIES CUDA::cuda_driver)
|
||||||
|
else()
|
||||||
|
set(TPL_CUDA_LIBRARIES CUDA::cuda_driver CUDA::cudart)
|
||||||
|
endif()
|
||||||
|
|
||||||
if(CMAKE_VERSION VERSION_GREATER_EQUAL "3.17.0")
|
if(CMAKE_VERSION VERSION_GREATER_EQUAL "3.17.0")
|
||||||
find_package(CUDAToolkit REQUIRED)
|
find_package(CUDAToolkit REQUIRED)
|
||||||
kokkos_create_imported_tpl(CUDA INTERFACE LINK_LIBRARIES CUDA::cuda_driver CUDA::cudart)
|
kokkos_create_imported_tpl(CUDA INTERFACE LINK_LIBRARIES ${TPL_CUDA_LIBRARIES})
|
||||||
kokkos_export_cmake_tpl(CUDAToolkit REQUIRED)
|
kokkos_export_cmake_tpl(CUDAToolkit REQUIRED)
|
||||||
else()
|
else()
|
||||||
include(${CMAKE_CURRENT_LIST_DIR}/CudaToolkit.cmake)
|
include(${CMAKE_CURRENT_LIST_DIR}/CudaToolkit.cmake)
|
||||||
@ -33,8 +40,8 @@ else()
|
|||||||
endif()
|
endif()
|
||||||
|
|
||||||
include(FindPackageHandleStandardArgs)
|
include(FindPackageHandleStandardArgs)
|
||||||
find_package_handle_standard_args(TPLCUDA ${DEFAULT_MSG} FOUND_CUDART FOUND_CUDA_DRIVER)
|
find_package_handle_standard_args(TPLCUDA ${DEFAULT_MSG} FOUND_CUDA_DRIVER FOUND_CUDART)
|
||||||
if(FOUND_CUDA_DRIVER AND FOUND_CUDART)
|
if(FOUND_CUDA_DRIVER AND FOUND_CUDART)
|
||||||
kokkos_create_imported_tpl(CUDA INTERFACE LINK_LIBRARIES CUDA::cuda_driver CUDA::cudart)
|
kokkos_create_imported_tpl(CUDA INTERFACE LINK_LIBRARIES ${TPL_CUDA_LIBRARIES})
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|||||||
@ -1,15 +0,0 @@
|
|||||||
function(kokkos_set_intel_flags full_standard int_standard)
|
|
||||||
string(TOLOWER ${full_standard} FULL_LC_STANDARD)
|
|
||||||
string(TOLOWER ${int_standard} INT_LC_STANDARD)
|
|
||||||
# The following three blocks of code were copied from
|
|
||||||
# /Modules/Compiler/Intel-CXX.cmake from CMake 3.18.1 and then modified.
|
|
||||||
if(CMAKE_CXX_SIMULATE_ID STREQUAL MSVC)
|
|
||||||
set(_std -Qstd)
|
|
||||||
set(_ext c++)
|
|
||||||
else()
|
|
||||||
set(_std -std)
|
|
||||||
set(_ext gnu++)
|
|
||||||
endif()
|
|
||||||
set(KOKKOS_CXX_STANDARD_FLAG "${_std}=c++${FULL_LC_STANDARD}" PARENT_SCOPE)
|
|
||||||
set(KOKKOS_CXX_INTERMDIATE_STANDARD_FLAG "${_std}=${_ext}${INT_LC_STANDARD}" PARENT_SCOPE)
|
|
||||||
endfunction()
|
|
||||||
@ -67,6 +67,7 @@ declare_and_check_host_arch(POWER9 "IBM POWER9 CPUs")
|
|||||||
declare_and_check_host_arch(ZEN "AMD Zen architecture")
|
declare_and_check_host_arch(ZEN "AMD Zen architecture")
|
||||||
declare_and_check_host_arch(ZEN2 "AMD Zen2 architecture")
|
declare_and_check_host_arch(ZEN2 "AMD Zen2 architecture")
|
||||||
declare_and_check_host_arch(ZEN3 "AMD Zen3 architecture")
|
declare_and_check_host_arch(ZEN3 "AMD Zen3 architecture")
|
||||||
|
declare_and_check_host_arch(ZEN4 "AMD Zen4 architecture")
|
||||||
declare_and_check_host_arch(RISCV_SG2042 "SG2042 (RISC-V) CPUs")
|
declare_and_check_host_arch(RISCV_SG2042 "SG2042 (RISC-V) CPUs")
|
||||||
declare_and_check_host_arch(RISCV_RVA22V "RVA22V (RISC-V) CPUs")
|
declare_and_check_host_arch(RISCV_RVA22V "RVA22V (RISC-V) CPUs")
|
||||||
|
|
||||||
@ -163,16 +164,11 @@ if(KOKKOS_ENABLE_COMPILER_WARNINGS)
|
|||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
# ICPC doesn't support -Wsuggest-override
|
|
||||||
if(KOKKOS_CXX_COMPILER_ID STREQUAL Intel)
|
|
||||||
list(REMOVE_ITEM COMMON_WARNINGS "-Wsuggest-override")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if(KOKKOS_CXX_COMPILER_ID STREQUAL Clang)
|
if(KOKKOS_CXX_COMPILER_ID STREQUAL Clang)
|
||||||
list(APPEND COMMON_WARNINGS "-Wimplicit-fallthrough")
|
list(APPEND COMMON_WARNINGS "-Wimplicit-fallthrough")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
set(GNU_WARNINGS "-Wempty-body" "-Wclobbered" "-Wignored-qualifiers" ${COMMON_WARNINGS})
|
set(GNU_WARNINGS "-Wempty-body" "-Wignored-qualifiers" ${COMMON_WARNINGS})
|
||||||
if(KOKKOS_CXX_COMPILER_ID STREQUAL GNU)
|
if(KOKKOS_CXX_COMPILER_ID STREQUAL GNU)
|
||||||
list(APPEND GNU_WARNINGS "-Wimplicit-fallthrough")
|
list(APPEND GNU_WARNINGS "-Wimplicit-fallthrough")
|
||||||
endif()
|
endif()
|
||||||
@ -349,12 +345,27 @@ endif()
|
|||||||
|
|
||||||
if(KOKKOS_ARCH_ARMV9_GRACE)
|
if(KOKKOS_ARCH_ARMV9_GRACE)
|
||||||
set(KOKKOS_ARCH_ARM_NEON ON)
|
set(KOKKOS_ARCH_ARM_NEON ON)
|
||||||
|
if(KOKKOS_CXX_HOST_COMPILER_ID STREQUAL NVHPC)
|
||||||
|
check_cxx_compiler_flag("-tp=grace" COMPILER_SUPPORTS_GRACE_AS_TARGET_PROCESSOR)
|
||||||
|
else()
|
||||||
check_cxx_compiler_flag("-mcpu=neoverse-n2" COMPILER_SUPPORTS_NEOVERSE_N2)
|
check_cxx_compiler_flag("-mcpu=neoverse-n2" COMPILER_SUPPORTS_NEOVERSE_N2)
|
||||||
check_cxx_compiler_flag("-msve-vector-bits=128" COMPILER_SUPPORTS_SVE_VECTOR_BITS)
|
check_cxx_compiler_flag("-msve-vector-bits=128" COMPILER_SUPPORTS_SVE_VECTOR_BITS)
|
||||||
if(COMPILER_SUPPORTS_NEOVERSE_N2 AND COMPILER_SUPPORTS_SVE_VECTOR_BITS)
|
endif()
|
||||||
compiler_specific_flags(COMPILER_ID KOKKOS_CXX_HOST_COMPILER_ID DEFAULT -mcpu=neoverse-n2 -msve-vector-bits=128)
|
if(COMPILER_SUPPORTS_NEOVERSE_N2 AND COMPILER_SUPPORTS_SVE_VECTOR_BITS OR COMPILER_SUPPORTS_GRACE_AS_TARGET_PROCESSOR)
|
||||||
|
compiler_specific_flags(
|
||||||
|
COMPILER_ID
|
||||||
|
KOKKOS_CXX_HOST_COMPILER_ID
|
||||||
|
NVHPC
|
||||||
|
-tp=grace
|
||||||
|
DEFAULT
|
||||||
|
-mcpu=neoverse-n2
|
||||||
|
-msve-vector-bits=128
|
||||||
|
)
|
||||||
else()
|
else()
|
||||||
message(WARNING "Compiler does not support ARMv9 Grace architecture")
|
message(SEND_ERROR "Your compiler does not appear to support the ARMv9 Grace architecture.
|
||||||
|
Please ensure you are using a compatible compiler and toolchain.
|
||||||
|
Alternatively, try configuring with -DKokkos_ARCH_NATIVE=ON to use the native architecture of your system."
|
||||||
|
)
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
@ -362,8 +373,6 @@ if(KOKKOS_ARCH_ZEN)
|
|||||||
compiler_specific_flags(
|
compiler_specific_flags(
|
||||||
COMPILER_ID
|
COMPILER_ID
|
||||||
KOKKOS_CXX_HOST_COMPILER_ID
|
KOKKOS_CXX_HOST_COMPILER_ID
|
||||||
Intel
|
|
||||||
-mavx2
|
|
||||||
MSVC
|
MSVC
|
||||||
/arch:AVX2
|
/arch:AVX2
|
||||||
NVHPC
|
NVHPC
|
||||||
@ -380,8 +389,6 @@ if(KOKKOS_ARCH_ZEN2)
|
|||||||
compiler_specific_flags(
|
compiler_specific_flags(
|
||||||
COMPILER_ID
|
COMPILER_ID
|
||||||
KOKKOS_CXX_HOST_COMPILER_ID
|
KOKKOS_CXX_HOST_COMPILER_ID
|
||||||
Intel
|
|
||||||
-mavx2
|
|
||||||
MSVC
|
MSVC
|
||||||
/arch:AVX2
|
/arch:AVX2
|
||||||
NVHPC
|
NVHPC
|
||||||
@ -398,12 +405,10 @@ if(KOKKOS_ARCH_ZEN3)
|
|||||||
compiler_specific_flags(
|
compiler_specific_flags(
|
||||||
COMPILER_ID
|
COMPILER_ID
|
||||||
KOKKOS_CXX_HOST_COMPILER_ID
|
KOKKOS_CXX_HOST_COMPILER_ID
|
||||||
Intel
|
|
||||||
-mavx2
|
|
||||||
MSVC
|
MSVC
|
||||||
/arch:AVX2
|
/arch:AVX2
|
||||||
NVHPC
|
NVHPC
|
||||||
-tp=zen2
|
-tp=zen3
|
||||||
DEFAULT
|
DEFAULT
|
||||||
-march=znver3
|
-march=znver3
|
||||||
-mtune=znver3
|
-mtune=znver3
|
||||||
@ -412,6 +417,22 @@ if(KOKKOS_ARCH_ZEN3)
|
|||||||
set(KOKKOS_ARCH_AVX2 ON)
|
set(KOKKOS_ARCH_AVX2 ON)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
if(KOKKOS_ARCH_ZEN4)
|
||||||
|
compiler_specific_flags(
|
||||||
|
COMPILER_ID
|
||||||
|
KOKKOS_CXX_HOST_COMPILER_ID
|
||||||
|
MSVC
|
||||||
|
/arch:AVX512
|
||||||
|
NVHPC
|
||||||
|
-tp=zen4
|
||||||
|
DEFAULT
|
||||||
|
-march=znver4
|
||||||
|
-mtune=znver4
|
||||||
|
)
|
||||||
|
set(KOKKOS_ARCH_AMD_ZEN4 ON)
|
||||||
|
set(KOKKOS_ARCH_AVX512XEON ON)
|
||||||
|
endif()
|
||||||
|
|
||||||
if(KOKKOS_ARCH_SNB OR KOKKOS_ARCH_AMDAVX)
|
if(KOKKOS_ARCH_SNB OR KOKKOS_ARCH_AMDAVX)
|
||||||
set(KOKKOS_ARCH_AVX ON)
|
set(KOKKOS_ARCH_AVX ON)
|
||||||
compiler_specific_flags(
|
compiler_specific_flags(
|
||||||
@ -419,8 +440,6 @@ if(KOKKOS_ARCH_SNB OR KOKKOS_ARCH_AMDAVX)
|
|||||||
KOKKOS_CXX_HOST_COMPILER_ID
|
KOKKOS_CXX_HOST_COMPILER_ID
|
||||||
Cray
|
Cray
|
||||||
NO-VALUE-SPECIFIED
|
NO-VALUE-SPECIFIED
|
||||||
Intel
|
|
||||||
-mavx
|
|
||||||
MSVC
|
MSVC
|
||||||
/arch:AVX
|
/arch:AVX
|
||||||
NVHPC
|
NVHPC
|
||||||
@ -437,8 +456,6 @@ if(KOKKOS_ARCH_HSW)
|
|||||||
KOKKOS_CXX_HOST_COMPILER_ID
|
KOKKOS_CXX_HOST_COMPILER_ID
|
||||||
Cray
|
Cray
|
||||||
NO-VALUE-SPECIFIED
|
NO-VALUE-SPECIFIED
|
||||||
Intel
|
|
||||||
-xCORE-AVX2
|
|
||||||
MSVC
|
MSVC
|
||||||
/arch:AVX2
|
/arch:AVX2
|
||||||
NVHPC
|
NVHPC
|
||||||
@ -477,8 +494,6 @@ if(KOKKOS_ARCH_BDW)
|
|||||||
KOKKOS_CXX_HOST_COMPILER_ID
|
KOKKOS_CXX_HOST_COMPILER_ID
|
||||||
Cray
|
Cray
|
||||||
NO-VALUE-SPECIFIED
|
NO-VALUE-SPECIFIED
|
||||||
Intel
|
|
||||||
-xCORE-AVX2
|
|
||||||
MSVC
|
MSVC
|
||||||
/arch:AVX2
|
/arch:AVX2
|
||||||
NVHPC
|
NVHPC
|
||||||
@ -498,8 +513,6 @@ if(KOKKOS_ARCH_KNL)
|
|||||||
KOKKOS_CXX_HOST_COMPILER_ID
|
KOKKOS_CXX_HOST_COMPILER_ID
|
||||||
Cray
|
Cray
|
||||||
NO-VALUE-SPECIFIED
|
NO-VALUE-SPECIFIED
|
||||||
Intel
|
|
||||||
-xMIC-AVX512
|
|
||||||
MSVC
|
MSVC
|
||||||
/arch:AVX512
|
/arch:AVX512
|
||||||
NVHPC
|
NVHPC
|
||||||
@ -520,8 +533,6 @@ if(KOKKOS_ARCH_SKL)
|
|||||||
KOKKOS_CXX_HOST_COMPILER_ID
|
KOKKOS_CXX_HOST_COMPILER_ID
|
||||||
Cray
|
Cray
|
||||||
NO-VALUE-SPECIFIED
|
NO-VALUE-SPECIFIED
|
||||||
Intel
|
|
||||||
-xSKYLAKE
|
|
||||||
MSVC
|
MSVC
|
||||||
/arch:AVX2
|
/arch:AVX2
|
||||||
NVHPC
|
NVHPC
|
||||||
@ -539,8 +550,6 @@ if(KOKKOS_ARCH_SKX)
|
|||||||
KOKKOS_CXX_HOST_COMPILER_ID
|
KOKKOS_CXX_HOST_COMPILER_ID
|
||||||
Cray
|
Cray
|
||||||
NO-VALUE-SPECIFIED
|
NO-VALUE-SPECIFIED
|
||||||
Intel
|
|
||||||
-xCORE-AVX512
|
|
||||||
MSVC
|
MSVC
|
||||||
/arch:AVX512
|
/arch:AVX512
|
||||||
NVHPC
|
NVHPC
|
||||||
@ -1193,9 +1202,8 @@ if(KOKKOS_ENABLE_HIP AND NOT AMDGPU_ARCH_ALREADY_SPECIFIED AND NOT KOKKOS_IMPL_A
|
|||||||
)
|
)
|
||||||
else()
|
else()
|
||||||
execute_process(COMMAND ${ROCM_ENUMERATOR} OUTPUT_VARIABLE GPU_ARCHS)
|
execute_process(COMMAND ${ROCM_ENUMERATOR} OUTPUT_VARIABLE GPU_ARCHS)
|
||||||
string(LENGTH "${GPU_ARCHS}" len_str)
|
# Exits early if no GPU was detected
|
||||||
# enumerator always output gfx000 as the first line
|
if("${GPU_ARCHS}" STREQUAL "")
|
||||||
if(${len_str} LESS 8)
|
|
||||||
message(SEND_ERROR "HIP enabled but no AMD GPU architecture could be automatically detected. "
|
message(SEND_ERROR "HIP enabled but no AMD GPU architecture could be automatically detected. "
|
||||||
"Please manually specify one AMD GPU architecture via -DKokkos_ARCH_{..}=ON'."
|
"Please manually specify one AMD GPU architecture via -DKokkos_ARCH_{..}=ON'."
|
||||||
)
|
)
|
||||||
|
|||||||
@ -163,7 +163,6 @@ if(CMAKE_CXX_STANDARD EQUAL 17)
|
|||||||
set(KOKKOS_CLANG_CUDA_MINIMUM 10.0.0)
|
set(KOKKOS_CLANG_CUDA_MINIMUM 10.0.0)
|
||||||
set(KOKKOS_CLANG_OPENMPTARGET_MINIMUM 15.0.0)
|
set(KOKKOS_CLANG_OPENMPTARGET_MINIMUM 15.0.0)
|
||||||
set(KOKKOS_GCC_MINIMUM 8.2.0)
|
set(KOKKOS_GCC_MINIMUM 8.2.0)
|
||||||
set(KOKKOS_INTEL_MINIMUM 19.0.5)
|
|
||||||
set(KOKKOS_INTEL_LLVM_CPU_MINIMUM 2021.1.1)
|
set(KOKKOS_INTEL_LLVM_CPU_MINIMUM 2021.1.1)
|
||||||
set(KOKKOS_INTEL_LLVM_SYCL_MINIMUM 2023.0.0)
|
set(KOKKOS_INTEL_LLVM_SYCL_MINIMUM 2023.0.0)
|
||||||
set(KOKKOS_NVCC_MINIMUM 11.0.0)
|
set(KOKKOS_NVCC_MINIMUM 11.0.0)
|
||||||
@ -175,7 +174,6 @@ else()
|
|||||||
set(KOKKOS_CLANG_CUDA_MINIMUM 14.0.0)
|
set(KOKKOS_CLANG_CUDA_MINIMUM 14.0.0)
|
||||||
set(KOKKOS_CLANG_OPENMPTARGET_MINIMUM 15.0.0)
|
set(KOKKOS_CLANG_OPENMPTARGET_MINIMUM 15.0.0)
|
||||||
set(KOKKOS_GCC_MINIMUM 10.1.0)
|
set(KOKKOS_GCC_MINIMUM 10.1.0)
|
||||||
set(KOKKOS_INTEL_MINIMUM "not supported")
|
|
||||||
set(KOKKOS_INTEL_LLVM_CPU_MINIMUM 2022.0.0)
|
set(KOKKOS_INTEL_LLVM_CPU_MINIMUM 2022.0.0)
|
||||||
set(KOKKOS_INTEL_LLVM_SYCL_MINIMUM 2023.0.0)
|
set(KOKKOS_INTEL_LLVM_SYCL_MINIMUM 2023.0.0)
|
||||||
set(KOKKOS_NVCC_MINIMUM 12.0.0)
|
set(KOKKOS_NVCC_MINIMUM 12.0.0)
|
||||||
@ -191,7 +189,7 @@ set(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n Clang(CPU) ${KOKKO
|
|||||||
set(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n Clang(CUDA) ${KOKKOS_CLANG_CUDA_MINIMUM}")
|
set(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n Clang(CUDA) ${KOKKOS_CLANG_CUDA_MINIMUM}")
|
||||||
set(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n Clang(OpenMPTarget) ${KOKKOS_CLANG_OPENMPTARGET_MINIMUM}")
|
set(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n Clang(OpenMPTarget) ${KOKKOS_CLANG_OPENMPTARGET_MINIMUM}")
|
||||||
set(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n GCC ${KOKKOS_GCC_MINIMUM}")
|
set(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n GCC ${KOKKOS_GCC_MINIMUM}")
|
||||||
set(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n Intel ${KOKKOS_INTEL_MINIMUM}")
|
set(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n Intel not supported")
|
||||||
set(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n IntelLLVM(CPU) ${KOKKOS_INTEL_LLVM_CPU_MINIMUM}")
|
set(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n IntelLLVM(CPU) ${KOKKOS_INTEL_LLVM_CPU_MINIMUM}")
|
||||||
set(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n IntelLLVM(SYCL) ${KOKKOS_INTEL_LLVM_SYCL_MINIMUM}")
|
set(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n IntelLLVM(SYCL) ${KOKKOS_INTEL_LLVM_SYCL_MINIMUM}")
|
||||||
set(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n NVCC ${KOKKOS_NVCC_MINIMUM}")
|
set(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n NVCC ${KOKKOS_NVCC_MINIMUM}")
|
||||||
@ -214,9 +212,7 @@ elseif(KOKKOS_CXX_COMPILER_ID STREQUAL GNU)
|
|||||||
message(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}")
|
message(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}")
|
||||||
endif()
|
endif()
|
||||||
elseif(KOKKOS_CXX_COMPILER_ID STREQUAL Intel)
|
elseif(KOKKOS_CXX_COMPILER_ID STREQUAL Intel)
|
||||||
if((NOT CMAKE_CXX_STANDARD EQUAL 17) OR (KOKKOS_CXX_COMPILER_VERSION VERSION_LESS ${KOKKOS_INTEL_MINIMUM}))
|
|
||||||
message(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}")
|
message(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}")
|
||||||
endif()
|
|
||||||
elseif(KOKKOS_CXX_COMPILER_ID STREQUAL IntelLLVM AND NOT Kokkos_ENABLE_SYCL)
|
elseif(KOKKOS_CXX_COMPILER_ID STREQUAL IntelLLVM AND NOT Kokkos_ENABLE_SYCL)
|
||||||
if(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS ${KOKKOS_INTEL_LLVM_CPU_MINIMUM})
|
if(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS ${KOKKOS_INTEL_LLVM_CPU_MINIMUM})
|
||||||
message(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}")
|
message(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}")
|
||||||
|
|||||||
@ -76,7 +76,7 @@ kokkos_enable_option(
|
|||||||
HIP_MULTIPLE_KERNEL_INSTANTIATIONS OFF
|
HIP_MULTIPLE_KERNEL_INSTANTIATIONS OFF
|
||||||
"Whether multiple kernels are instantiated at compile time - improve performance but increase compile time"
|
"Whether multiple kernels are instantiated at compile time - improve performance but increase compile time"
|
||||||
)
|
)
|
||||||
kokkos_enable_option(IMPL_HIP_MALLOC_ASYNC OFF "Whether to enable hipMallocAsync")
|
kokkos_enable_option(IMPL_HIP_MALLOC_ASYNC ${KOKKOS_ENABLE_HIP} "Whether to enable hipMallocAsync")
|
||||||
kokkos_enable_option(OPENACC_FORCE_HOST_AS_DEVICE OFF "Whether to force to use host as a target device for OpenACC")
|
kokkos_enable_option(OPENACC_FORCE_HOST_AS_DEVICE OFF "Whether to force to use host as a target device for OpenACC")
|
||||||
|
|
||||||
# This option will go away eventually, but allows fallback to old implementation when needed.
|
# This option will go away eventually, but allows fallback to old implementation when needed.
|
||||||
|
|||||||
@ -799,7 +799,6 @@ function(COMPILER_SPECIFIC_OPTIONS_HELPER)
|
|||||||
NVHPC
|
NVHPC
|
||||||
DEFAULT
|
DEFAULT
|
||||||
Cray
|
Cray
|
||||||
Intel
|
|
||||||
Clang
|
Clang
|
||||||
AppleClang
|
AppleClang
|
||||||
IntelLLVM
|
IntelLLVM
|
||||||
|
|||||||
@ -155,9 +155,6 @@ if(NOT KOKKOS_CXX_STANDARD_FEATURE)
|
|||||||
elseif(KOKKOS_CXX_COMPILER_ID STREQUAL NVHPC)
|
elseif(KOKKOS_CXX_COMPILER_ID STREQUAL NVHPC)
|
||||||
include(${KOKKOS_SRC_PATH}/cmake/pgi.cmake)
|
include(${KOKKOS_SRC_PATH}/cmake/pgi.cmake)
|
||||||
kokkos_set_pgi_flags(${KOKKOS_CXX_STANDARD} ${KOKKOS_CXX_INTERMEDIATE_STANDARD})
|
kokkos_set_pgi_flags(${KOKKOS_CXX_STANDARD} ${KOKKOS_CXX_INTERMEDIATE_STANDARD})
|
||||||
elseif(KOKKOS_CXX_COMPILER_ID STREQUAL Intel)
|
|
||||||
include(${KOKKOS_SRC_PATH}/cmake/intel.cmake)
|
|
||||||
kokkos_set_intel_flags(${KOKKOS_CXX_STANDARD} ${KOKKOS_CXX_INTERMEDIATE_STANDARD})
|
|
||||||
elseif((KOKKOS_CXX_COMPILER_ID STREQUAL "MSVC") OR ((KOKKOS_CXX_COMPILER_ID STREQUAL "NVIDIA") AND WIN32))
|
elseif((KOKKOS_CXX_COMPILER_ID STREQUAL "MSVC") OR ((KOKKOS_CXX_COMPILER_ID STREQUAL "NVIDIA") AND WIN32))
|
||||||
include(${KOKKOS_SRC_PATH}/cmake/msvc.cmake)
|
include(${KOKKOS_SRC_PATH}/cmake/msvc.cmake)
|
||||||
kokkos_set_msvc_flags(${KOKKOS_CXX_STANDARD} ${KOKKOS_CXX_INTERMEDIATE_STANDARD})
|
kokkos_set_msvc_flags(${KOKKOS_CXX_STANDARD} ${KOKKOS_CXX_INTERMEDIATE_STANDARD})
|
||||||
|
|||||||
@ -106,7 +106,6 @@ function(KOKKOS_ADD_EXECUTABLE_AND_TEST ROOT_NAME)
|
|||||||
OR Kokkos_ENABLE_SYCL
|
OR Kokkos_ENABLE_SYCL
|
||||||
OR Kokkos_ENABLE_HPX
|
OR Kokkos_ENABLE_HPX
|
||||||
OR Kokkos_ENABLE_IMPL_SKIP_NO_RTTI_FLAG
|
OR Kokkos_ENABLE_IMPL_SKIP_NO_RTTI_FLAG
|
||||||
OR (KOKKOS_CXX_COMPILER_ID STREQUAL "Intel" AND KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 2021.2.0)
|
|
||||||
OR (KOKKOS_CXX_COMPILER_ID STREQUAL "NVIDIA" AND KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 11.3.0)
|
OR (KOKKOS_CXX_COMPILER_ID STREQUAL "NVIDIA" AND KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 11.3.0)
|
||||||
OR (KOKKOS_CXX_COMPILER_ID STREQUAL "NVIDIA" AND KOKKOS_CXX_HOST_COMPILER_ID STREQUAL "MSVC"))
|
OR (KOKKOS_CXX_COMPILER_ID STREQUAL "NVIDIA" AND KOKKOS_CXX_HOST_COMPILER_ID STREQUAL "MSVC"))
|
||||||
)
|
)
|
||||||
|
|||||||
@ -18,6 +18,8 @@ LINK ?= $(CXX)
|
|||||||
LDFLAGS ?=
|
LDFLAGS ?=
|
||||||
override LDFLAGS += -lpthread
|
override LDFLAGS += -lpthread
|
||||||
|
|
||||||
|
KOKKOS_USE_DEPRECATED_MAKEFILES=1
|
||||||
|
|
||||||
include $(KOKKOS_PATH)/Makefile.kokkos
|
include $(KOKKOS_PATH)/Makefile.kokkos
|
||||||
|
|
||||||
KOKKOS_CXXFLAGS += -I$(GTEST_PATH) -I${KOKKOS_PATH}/containers/performance_tests
|
KOKKOS_CXXFLAGS += -I$(GTEST_PATH) -I${KOKKOS_PATH}/containers/performance_tests
|
||||||
|
|||||||
@ -22,6 +22,7 @@
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#include <Kokkos_Core.hpp>
|
#include <Kokkos_Core.hpp>
|
||||||
|
#include <Kokkos_BitManipulation.hpp>
|
||||||
#include <Kokkos_Functional.hpp>
|
#include <Kokkos_Functional.hpp>
|
||||||
|
|
||||||
#include <impl/Kokkos_Bitset_impl.hpp>
|
#include <impl/Kokkos_Bitset_impl.hpp>
|
||||||
@ -62,13 +63,11 @@ class Bitset {
|
|||||||
BIT_SCAN_REVERSE | MOVE_HINT_BACKWARD;
|
BIT_SCAN_REVERSE | MOVE_HINT_BACKWARD;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
enum : unsigned {
|
static constexpr unsigned block_size = sizeof(unsigned) * CHAR_BIT;
|
||||||
block_size = static_cast<unsigned>(sizeof(unsigned) * CHAR_BIT)
|
static constexpr unsigned block_mask = block_size - 1u;
|
||||||
};
|
static constexpr unsigned block_shift =
|
||||||
enum : unsigned { block_mask = block_size - 1u };
|
Kokkos::has_single_bit(block_size) ? Kokkos::bit_width(block_size) - 1
|
||||||
enum : unsigned {
|
: ~0u;
|
||||||
block_shift = Kokkos::Impl::integral_power_of_two(block_size)
|
|
||||||
};
|
|
||||||
|
|
||||||
//! Type of @ref m_blocks.
|
//! Type of @ref m_blocks.
|
||||||
using block_view_type = View<unsigned*, Device, MemoryTraits<RandomAccess>>;
|
using block_view_type = View<unsigned*, Device, MemoryTraits<RandomAccess>>;
|
||||||
@ -135,9 +134,9 @@ class Bitset {
|
|||||||
|
|
||||||
if (m_last_block_mask) {
|
if (m_last_block_mask) {
|
||||||
// clear the unused bits in the last block
|
// clear the unused bits in the last block
|
||||||
Kokkos::Impl::DeepCopy<typename Device::memory_space, Kokkos::HostSpace>(
|
auto last_block = Kokkos::subview(m_blocks, m_blocks.extent(0) - 1u);
|
||||||
m_blocks.data() + (m_blocks.extent(0) - 1u), &m_last_block_mask,
|
Kokkos::deep_copy(typename Device::execution_space{}, last_block,
|
||||||
sizeof(unsigned));
|
m_last_block_mask);
|
||||||
Kokkos::fence(
|
Kokkos::fence(
|
||||||
"Bitset::set: fence after clearing unused bits copying from "
|
"Bitset::set: fence after clearing unused bits copying from "
|
||||||
"HostSpace");
|
"HostSpace");
|
||||||
@ -324,9 +323,11 @@ class ConstBitset {
|
|||||||
using block_view_type = typename Bitset<Device>::block_view_type::const_type;
|
using block_view_type = typename Bitset<Device>::block_view_type::const_type;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
enum { block_size = static_cast<unsigned>(sizeof(unsigned) * CHAR_BIT) };
|
static constexpr unsigned block_size = sizeof(unsigned) * CHAR_BIT;
|
||||||
enum { block_mask = block_size - 1u };
|
static constexpr unsigned block_mask = block_size - 1u;
|
||||||
enum { block_shift = Kokkos::Impl::integral_power_of_two(block_size) };
|
static constexpr unsigned block_shift =
|
||||||
|
Kokkos::has_single_bit(block_size) ? Kokkos::bit_width(block_size) - 1
|
||||||
|
: ~0u;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
KOKKOS_FUNCTION
|
KOKKOS_FUNCTION
|
||||||
@ -400,13 +401,7 @@ void deep_copy(Bitset<DstDevice>& dst, Bitset<SrcDevice> const& src) {
|
|||||||
Kokkos::Impl::throw_runtime_exception(
|
Kokkos::Impl::throw_runtime_exception(
|
||||||
"Error: Cannot deep_copy bitsets of different sizes!");
|
"Error: Cannot deep_copy bitsets of different sizes!");
|
||||||
}
|
}
|
||||||
|
Kokkos::deep_copy(dst.m_blocks, src.m_blocks);
|
||||||
Kokkos::fence("Bitset::deep_copy: fence before copy operation");
|
|
||||||
Kokkos::Impl::DeepCopy<typename DstDevice::memory_space,
|
|
||||||
typename SrcDevice::memory_space>(
|
|
||||||
dst.m_blocks.data(), src.m_blocks.data(),
|
|
||||||
sizeof(unsigned) * src.m_blocks.extent(0));
|
|
||||||
Kokkos::fence("Bitset::deep_copy: fence after copy operation");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename DstDevice, typename SrcDevice>
|
template <typename DstDevice, typename SrcDevice>
|
||||||
@ -415,13 +410,7 @@ void deep_copy(Bitset<DstDevice>& dst, ConstBitset<SrcDevice> const& src) {
|
|||||||
Kokkos::Impl::throw_runtime_exception(
|
Kokkos::Impl::throw_runtime_exception(
|
||||||
"Error: Cannot deep_copy bitsets of different sizes!");
|
"Error: Cannot deep_copy bitsets of different sizes!");
|
||||||
}
|
}
|
||||||
|
Kokkos::deep_copy(dst.m_blocks, src.m_blocks);
|
||||||
Kokkos::fence("Bitset::deep_copy: fence before copy operation");
|
|
||||||
Kokkos::Impl::DeepCopy<typename DstDevice::memory_space,
|
|
||||||
typename SrcDevice::memory_space>(
|
|
||||||
dst.m_blocks.data(), src.m_blocks.data(),
|
|
||||||
sizeof(unsigned) * src.m_blocks.extent(0));
|
|
||||||
Kokkos::fence("Bitset::deep_copy: fence after copy operation");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename DstDevice, typename SrcDevice>
|
template <typename DstDevice, typename SrcDevice>
|
||||||
@ -430,13 +419,7 @@ void deep_copy(ConstBitset<DstDevice>& dst, ConstBitset<SrcDevice> const& src) {
|
|||||||
Kokkos::Impl::throw_runtime_exception(
|
Kokkos::Impl::throw_runtime_exception(
|
||||||
"Error: Cannot deep_copy bitsets of different sizes!");
|
"Error: Cannot deep_copy bitsets of different sizes!");
|
||||||
}
|
}
|
||||||
|
Kokkos::deep_copy(dst.m_blocks, src.m_blocks);
|
||||||
Kokkos::fence("Bitset::deep_copy: fence before copy operation");
|
|
||||||
Kokkos::Impl::DeepCopy<typename DstDevice::memory_space,
|
|
||||||
typename SrcDevice::memory_space>(
|
|
||||||
dst.m_blocks.data(), src.m_blocks.data(),
|
|
||||||
sizeof(unsigned) * src.m_blocks.extent(0));
|
|
||||||
Kokkos::fence("Bitset::deep_copy: fence after copy operation");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Kokkos
|
} // namespace Kokkos
|
||||||
|
|||||||
@ -211,6 +211,12 @@ class DualView : public ViewTraits<DataType, Properties...> {
|
|||||||
public:
|
public:
|
||||||
//@}
|
//@}
|
||||||
|
|
||||||
|
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
|
||||||
|
public:
|
||||||
|
#else
|
||||||
|
private:
|
||||||
|
#endif
|
||||||
|
|
||||||
// Moved this specifically after modified_flags to resolve an alignment issue
|
// Moved this specifically after modified_flags to resolve an alignment issue
|
||||||
// on MSVC/NVCC
|
// on MSVC/NVCC
|
||||||
//! \name The two View instances.
|
//! \name The two View instances.
|
||||||
@ -219,6 +225,7 @@ class DualView : public ViewTraits<DataType, Properties...> {
|
|||||||
t_host h_view;
|
t_host h_view;
|
||||||
//@}
|
//@}
|
||||||
|
|
||||||
|
public:
|
||||||
//! \name Constructors
|
//! \name Constructors
|
||||||
//@{
|
//@{
|
||||||
|
|
||||||
@ -456,16 +463,21 @@ class DualView : public ViewTraits<DataType, Properties...> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#ifdef KOKKOS_COMPILER_INTEL
|
|
||||||
__builtin_unreachable();
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
t_host view_host() const { return h_view; }
|
t_host view_host() const { return h_view; }
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
t_dev view_device() const { return d_view; }
|
t_dev view_device() const { return d_view; }
|
||||||
|
#else
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
const t_host& view_host() const { return h_view; }
|
||||||
|
|
||||||
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
const t_dev& view_device() const { return d_view; }
|
||||||
|
#endif
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION constexpr bool is_allocated() const {
|
KOKKOS_INLINE_FUNCTION constexpr bool is_allocated() const {
|
||||||
return (d_view.is_allocated() && h_view.is_allocated());
|
return (d_view.is_allocated() && h_view.is_allocated());
|
||||||
@ -615,8 +627,8 @@ class DualView : public ViewTraits<DataType, Properties...> {
|
|||||||
impl_report_host_sync();
|
impl_report_host_sync();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if constexpr (std::is_same<typename t_host::memory_space,
|
if constexpr (std::is_same_v<typename t_host::memory_space,
|
||||||
typename t_dev::memory_space>::value) {
|
typename t_dev::memory_space>) {
|
||||||
typename t_dev::execution_space().fence(
|
typename t_dev::execution_space().fence(
|
||||||
"Kokkos::DualView<>::sync: fence after syncing DualView");
|
"Kokkos::DualView<>::sync: fence after syncing DualView");
|
||||||
typename t_host::execution_space().fence(
|
typename t_host::execution_space().fence(
|
||||||
@ -687,8 +699,8 @@ class DualView : public ViewTraits<DataType, Properties...> {
|
|||||||
// deliberately passing args by cref as they're used multiple times
|
// deliberately passing args by cref as they're used multiple times
|
||||||
template <typename... Args>
|
template <typename... Args>
|
||||||
void sync_host_impl(Args const&... args) {
|
void sync_host_impl(Args const&... args) {
|
||||||
if (!std::is_same<typename traits::data_type,
|
if (!std::is_same_v<typename traits::data_type,
|
||||||
typename traits::non_const_data_type>::value)
|
typename traits::non_const_data_type>)
|
||||||
Impl::throw_runtime_exception(
|
Impl::throw_runtime_exception(
|
||||||
"Calling sync_host on a DualView with a const datatype.");
|
"Calling sync_host on a DualView with a const datatype.");
|
||||||
if (modified_flags.data() == nullptr) return;
|
if (modified_flags.data() == nullptr) return;
|
||||||
@ -718,8 +730,8 @@ class DualView : public ViewTraits<DataType, Properties...> {
|
|||||||
// deliberately passing args by cref as they're used multiple times
|
// deliberately passing args by cref as they're used multiple times
|
||||||
template <typename... Args>
|
template <typename... Args>
|
||||||
void sync_device_impl(Args const&... args) {
|
void sync_device_impl(Args const&... args) {
|
||||||
if (!std::is_same<typename traits::data_type,
|
if (!std::is_same_v<typename traits::data_type,
|
||||||
typename traits::non_const_data_type>::value)
|
typename traits::non_const_data_type>)
|
||||||
Impl::throw_runtime_exception(
|
Impl::throw_runtime_exception(
|
||||||
"Calling sync_device on a DualView with a const datatype.");
|
"Calling sync_device on a DualView with a const datatype.");
|
||||||
if (modified_flags.data() == nullptr) return;
|
if (modified_flags.data() == nullptr) return;
|
||||||
@ -1264,10 +1276,10 @@ namespace Kokkos {
|
|||||||
template <class DT, class... DP, class ST, class... SP>
|
template <class DT, class... DP, class ST, class... SP>
|
||||||
void deep_copy(DualView<DT, DP...>& dst, const DualView<ST, SP...>& src) {
|
void deep_copy(DualView<DT, DP...>& dst, const DualView<ST, SP...>& src) {
|
||||||
if (src.need_sync_device()) {
|
if (src.need_sync_device()) {
|
||||||
deep_copy(dst.h_view, src.h_view);
|
deep_copy(dst.view_host(), src.view_host());
|
||||||
dst.modify_host();
|
dst.modify_host();
|
||||||
} else {
|
} else {
|
||||||
deep_copy(dst.d_view, src.d_view);
|
deep_copy(dst.view_device(), src.view_device());
|
||||||
dst.modify_device();
|
dst.modify_device();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1276,10 +1288,10 @@ template <class ExecutionSpace, class DT, class... DP, class ST, class... SP>
|
|||||||
void deep_copy(const ExecutionSpace& exec, DualView<DT, DP...>& dst,
|
void deep_copy(const ExecutionSpace& exec, DualView<DT, DP...>& dst,
|
||||||
const DualView<ST, SP...>& src) {
|
const DualView<ST, SP...>& src) {
|
||||||
if (src.need_sync_device()) {
|
if (src.need_sync_device()) {
|
||||||
deep_copy(exec, dst.h_view, src.h_view);
|
deep_copy(exec, dst.view_host(), src.view_host());
|
||||||
dst.modify_host();
|
dst.modify_host();
|
||||||
} else {
|
} else {
|
||||||
deep_copy(exec, dst.d_view, src.d_view);
|
deep_copy(exec, dst.view_device(), src.view_device());
|
||||||
dst.modify_device();
|
dst.modify_device();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -626,9 +626,8 @@ class DynRankView : private View<DataType*******, Properties...> {
|
|||||||
} else
|
} else
|
||||||
#endif
|
#endif
|
||||||
return view_type::operator()(i0, 0, 0, 0, 0, 0, 0);
|
return view_type::operator()(i0, 0, 0, 0, 0, 0, 0);
|
||||||
#if defined KOKKOS_COMPILER_INTEL || \
|
#if defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \
|
||||||
(defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \
|
!defined(KOKKOS_COMPILER_MSVC)
|
||||||
!defined(KOKKOS_COMPILER_MSVC))
|
|
||||||
__builtin_unreachable();
|
__builtin_unreachable();
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
@ -656,9 +655,8 @@ class DynRankView : private View<DataType*******, Properties...> {
|
|||||||
} else
|
} else
|
||||||
#endif
|
#endif
|
||||||
return view_type::operator()(i0, i1, 0, 0, 0, 0, 0);
|
return view_type::operator()(i0, i1, 0, 0, 0, 0, 0);
|
||||||
#if defined KOKKOS_COMPILER_INTEL || \
|
#if defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \
|
||||||
(defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \
|
!defined(KOKKOS_COMPILER_MSVC)
|
||||||
!defined(KOKKOS_COMPILER_MSVC))
|
|
||||||
__builtin_unreachable();
|
__builtin_unreachable();
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
@ -690,9 +688,8 @@ class DynRankView : private View<DataType*******, Properties...> {
|
|||||||
} else
|
} else
|
||||||
#endif
|
#endif
|
||||||
return view_type::operator()(i0, i1, i2, 0, 0, 0, 0);
|
return view_type::operator()(i0, i1, i2, 0, 0, 0, 0);
|
||||||
#if defined KOKKOS_COMPILER_INTEL || \
|
#if defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \
|
||||||
(defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \
|
!defined(KOKKOS_COMPILER_MSVC)
|
||||||
!defined(KOKKOS_COMPILER_MSVC))
|
|
||||||
__builtin_unreachable();
|
__builtin_unreachable();
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
@ -1124,57 +1121,6 @@ KOKKOS_INLINE_FUNCTION bool operator!=(const DynRankView<LT, LP...>& lhs,
|
|||||||
namespace Kokkos {
|
namespace Kokkos {
|
||||||
namespace Impl {
|
namespace Impl {
|
||||||
|
|
||||||
template <class OutputView, class Enable = void>
|
|
||||||
struct DynRankViewFill {
|
|
||||||
using const_value_type = typename OutputView::traits::const_value_type;
|
|
||||||
|
|
||||||
const OutputView output;
|
|
||||||
const_value_type input;
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
|
||||||
void operator()(const size_t i0) const {
|
|
||||||
const size_t n1 = output.extent(1);
|
|
||||||
const size_t n2 = output.extent(2);
|
|
||||||
const size_t n3 = output.extent(3);
|
|
||||||
const size_t n4 = output.extent(4);
|
|
||||||
const size_t n5 = output.extent(5);
|
|
||||||
const size_t n6 = output.extent(6);
|
|
||||||
|
|
||||||
for (size_t i1 = 0; i1 < n1; ++i1) {
|
|
||||||
for (size_t i2 = 0; i2 < n2; ++i2) {
|
|
||||||
for (size_t i3 = 0; i3 < n3; ++i3) {
|
|
||||||
for (size_t i4 = 0; i4 < n4; ++i4) {
|
|
||||||
for (size_t i5 = 0; i5 < n5; ++i5) {
|
|
||||||
for (size_t i6 = 0; i6 < n6; ++i6) {
|
|
||||||
output.access(i0, i1, i2, i3, i4, i5, i6) = input;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
DynRankViewFill(const OutputView& arg_out, const_value_type& arg_in)
|
|
||||||
: output(arg_out), input(arg_in) {
|
|
||||||
using execution_space = typename OutputView::execution_space;
|
|
||||||
using Policy = Kokkos::RangePolicy<execution_space>;
|
|
||||||
|
|
||||||
Kokkos::parallel_for("Kokkos::DynRankViewFill", Policy(0, output.extent(0)),
|
|
||||||
*this);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
template <class OutputView>
|
|
||||||
struct DynRankViewFill<OutputView, std::enable_if_t<OutputView::rank == 0>> {
|
|
||||||
DynRankViewFill(const OutputView& dst,
|
|
||||||
const typename OutputView::const_value_type& src) {
|
|
||||||
Kokkos::Impl::DeepCopy<typename OutputView::memory_space,
|
|
||||||
Kokkos::HostSpace>(
|
|
||||||
dst.data(), &src, sizeof(typename OutputView::const_value_type));
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
template <class OutputView, class InputView,
|
template <class OutputView, class InputView,
|
||||||
class ExecSpace = typename OutputView::execution_space>
|
class ExecSpace = typename OutputView::execution_space>
|
||||||
struct DynRankViewRemap {
|
struct DynRankViewRemap {
|
||||||
@ -1521,9 +1467,8 @@ inline auto create_mirror(const DynRankView<T, P...>& src,
|
|||||||
return dst_type(prop_copy,
|
return dst_type(prop_copy,
|
||||||
Impl::reconstructLayout(src.layout(), src.rank()));
|
Impl::reconstructLayout(src.layout(), src.rank()));
|
||||||
}
|
}
|
||||||
#if defined(KOKKOS_COMPILER_INTEL) || \
|
#if defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \
|
||||||
(defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \
|
!defined(KOKKOS_COMPILER_MSVC)
|
||||||
!defined(KOKKOS_COMPILER_MSVC))
|
|
||||||
__builtin_unreachable();
|
__builtin_unreachable();
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
@ -1611,9 +1556,8 @@ inline auto create_mirror_view(
|
|||||||
return Kokkos::Impl::choose_create_mirror(src, arg_prop);
|
return Kokkos::Impl::choose_create_mirror(src, arg_prop);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#if defined(KOKKOS_COMPILER_INTEL) || \
|
#if defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \
|
||||||
(defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \
|
!defined(KOKKOS_COMPILER_MSVC)
|
||||||
!defined(KOKKOS_COMPILER_MSVC))
|
|
||||||
__builtin_unreachable();
|
__builtin_unreachable();
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
@ -1754,6 +1698,7 @@ inline void impl_resize(const Impl::ViewCtorProp<ViewCtorArgs...>& arg_prop,
|
|||||||
Kokkos::Impl::DynRankViewRemap<drview_type, drview_type>(
|
Kokkos::Impl::DynRankViewRemap<drview_type, drview_type>(
|
||||||
Impl::get_property<Impl::ExecutionSpaceTag>(prop_copy), v_resized, v);
|
Impl::get_property<Impl::ExecutionSpaceTag>(prop_copy), v_resized, v);
|
||||||
else {
|
else {
|
||||||
|
// NOLINTNEXTLINE(bugprone-unused-raii)
|
||||||
Kokkos::Impl::DynRankViewRemap<drview_type, drview_type>(v_resized, v);
|
Kokkos::Impl::DynRankViewRemap<drview_type, drview_type>(v_resized, v);
|
||||||
Kokkos::fence("Kokkos::resize(DynRankView)");
|
Kokkos::fence("Kokkos::resize(DynRankView)");
|
||||||
}
|
}
|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user