diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 5016d9a644..6d1bcee2db 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -71,6 +71,7 @@ src/EXTRA-COMMAND/group_ndx.* @akohlmey src/EXTRA-COMMAND/ndx_group.* @akohlmey src/EXTRA-COMPUTE/compute_stress_mop*.* @RomainVermorel src/EXTRA-COMPUTE/compute_born_matrix.* @Bibobu @athomps +src/EXTRA-DUMP/dump_extxyz.* @fxcoudert src/EXTRA-FIX/fix_deform_pressure.* @jtclemm src/EXTRA-PAIR/pair_dispersion_d3.* @soniasolomoni @arthurfl src/EXTRA-PAIR/d3_parameters.h @soniasolomoni @arthurfl diff --git a/cmake/Modules/Packages/KOKKOS.cmake b/cmake/Modules/Packages/KOKKOS.cmake index 2731b0df14..f878db654c 100644 --- a/cmake/Modules/Packages/KOKKOS.cmake +++ b/cmake/Modules/Packages/KOKKOS.cmake @@ -57,8 +57,8 @@ if(DOWNLOAD_KOKKOS) list(APPEND KOKKOS_LIB_BUILD_ARGS "-DCMAKE_CXX_EXTENSIONS=${CMAKE_CXX_EXTENSIONS}") list(APPEND KOKKOS_LIB_BUILD_ARGS "-DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}") include(ExternalProject) - set(KOKKOS_URL "https://github.com/kokkos/kokkos/archive/4.5.01.tar.gz" CACHE STRING "URL for KOKKOS tarball") - set(KOKKOS_MD5 "4d832aa0284169d9e3fbae3165286bc6" CACHE STRING "MD5 checksum of KOKKOS tarball") + set(KOKKOS_URL "https://github.com/kokkos/kokkos/archive/4.6.00.tar.gz" CACHE STRING "URL for KOKKOS tarball") + set(KOKKOS_MD5 "61b2b69ae50d83eedcc7d47a3fa3d6cb" CACHE STRING "MD5 checksum of KOKKOS tarball") mark_as_advanced(KOKKOS_URL) mark_as_advanced(KOKKOS_MD5) GetFallbackURL(KOKKOS_URL KOKKOS_FALLBACK) @@ -83,7 +83,7 @@ if(DOWNLOAD_KOKKOS) add_dependencies(LAMMPS::KOKKOSCORE kokkos_build) add_dependencies(LAMMPS::KOKKOSCONTAINERS kokkos_build) elseif(EXTERNAL_KOKKOS) - find_package(Kokkos 4.5.01 REQUIRED CONFIG) + find_package(Kokkos 4.6.00 REQUIRED CONFIG) target_link_libraries(lammps PRIVATE Kokkos::kokkos) else() set(LAMMPS_LIB_KOKKOS_SRC_DIR ${LAMMPS_LIB_SOURCE_DIR}/kokkos) diff --git a/doc/src/Build_extras.rst b/doc/src/Build_extras.rst index ca64ffbe2e..26cf776f4d 100644 --- a/doc/src/Build_extras.rst +++ b/doc/src/Build_extras.rst @@ -611,6 +611,9 @@ They must be specified in uppercase. * - ZEN3 - HOST - AMD Zen3 architecture + * - ZEN4 + - HOST + - AMD Zen4 architecture * - RISCV_SG2042 - HOST - SG2042 (RISC-V) CPUs @@ -714,7 +717,7 @@ They must be specified in uppercase. - GPU - Intel GPU Ponte Vecchio -This list was last updated for version 4.5.1 of the Kokkos library. +This list was last updated for version 4.6.0 of the Kokkos library. .. tabs:: diff --git a/doc/src/Commands_bond.rst b/doc/src/Commands_bond.rst index 05c505b5ee..3ac828670c 100644 --- a/doc/src/Commands_bond.rst +++ b/doc/src/Commands_bond.rst @@ -128,7 +128,7 @@ OPT. * :doc:`harmonic (iko) ` * :doc:`helix (o) ` * :doc:`lepton (o) ` - * :doc:`multi/harmonic (o) ` + * :doc:`multi/harmonic (ko) ` * :doc:`nharmonic (o) ` * :doc:`opls (iko) ` * :doc:`quadratic (o) ` diff --git a/doc/src/Commands_dump.rst b/doc/src/Commands_dump.rst index 86dab8b731..c938937755 100644 --- a/doc/src/Commands_dump.rst +++ b/doc/src/Commands_dump.rst @@ -19,6 +19,7 @@ An alphabetic list of all LAMMPS :doc:`dump ` commands. * :doc:`custom/gz ` * :doc:`custom/zstd ` * :doc:`dcd ` + * :doc:`extxyz ` * :doc:`grid ` * :doc:`grid/vtk ` * :doc:`h5md ` diff --git a/doc/src/Commands_fix.rst b/doc/src/Commands_fix.rst index ed9bf6429b..35c3804969 100644 --- a/doc/src/Commands_fix.rst +++ b/doc/src/Commands_fix.rst @@ -186,6 +186,7 @@ OPT. * :doc:`qeq/fire ` * :doc:`qeq/point ` * :doc:`qeq/reaxff (ko) ` + * :doc:`qeq/rel/reaxff ` * :doc:`qeq/shielded ` * :doc:`qeq/slater ` * :doc:`qmmm ` diff --git a/doc/src/dihedral_multi_harmonic.rst b/doc/src/dihedral_multi_harmonic.rst index 176d3815dc..5f6f840d61 100644 --- a/doc/src/dihedral_multi_harmonic.rst +++ b/doc/src/dihedral_multi_harmonic.rst @@ -4,7 +4,7 @@ dihedral_style multi/harmonic command ===================================== -Accelerator Variants: *multi/harmonic/omp* +Accelerator Variants: *multi/harmonic/kk*, *multi/harmonic/omp* Syntax """""" diff --git a/doc/src/dump.rst b/doc/src/dump.rst index a8175fa612..557d7b11bb 100644 --- a/doc/src/dump.rst +++ b/doc/src/dump.rst @@ -3,6 +3,7 @@ .. index:: dump cfg .. index:: dump custom .. index:: dump dcd +.. index:: dump extxyz .. index:: dump grid .. index:: dump grid/vtk .. index:: dump local @@ -59,7 +60,7 @@ Syntax * ID = user-assigned name for the dump * group-ID = ID of the group of atoms to be dumped -* style = *atom* or *atom/adios* or *atom/gz* or *atom/zstd* or *cfg* or *cfg/gz* or *cfg/zstd* or *cfg/uef* or *custom* or *custom/gz* or *custom/zstd* or *custom/adios* or *dcd* or *grid* or *grid/vtk* or *h5md* or *image* or *local* or *local/gz* or *local/zstd* or *molfile* or *movie* or *netcdf* or *netcdf/mpiio* or *vtk* or *xtc* or *xyz* or *xyz/gz* or *xyz/zstd* or *yaml* +* style = *atom* or *atom/adios* or *atom/gz* or *atom/zstd* or *cfg* or *cfg/gz* or *cfg/zstd* or *cfg/uef* or *custom* or *custom/gz* or *custom/zstd* or *custom/adios* or *dcd* or *extxyz* or *grid* or *grid/vtk* or *h5md* or *image* or *local* or *local/gz* or *local/zstd* or *molfile* or *movie* or *netcdf* or *netcdf/mpiio* or *vtk* or *xtc* or *xyz* or *xyz/gz* or *xyz/zstd* or *yaml* * N = dump on timesteps which are multiples of N * file = name of file to write dump info to * attribute1,attribute2,... = list of attributes for a particular style @@ -77,6 +78,7 @@ Syntax *custom*, *custom/gz*, *custom/zstd* attributes = see below *custom/adios* attributes = same as *custom* attributes, discussed on :doc:`dump custom/adios ` page *dcd* attributes = none + *extxyz* attributes = none *h5md* attributes = discussed on :doc:`dump h5md ` page *grid* attributes = see below *grid/vtk* attributes = see below @@ -242,28 +244,29 @@ all the processors or multiple smaller files. frames consistently to the same atom. This can lead to incorrect visualizations or results. LAMMPS will print a warning in such cases. -For the *atom*, *custom*, *cfg*, *grid*, and *local* styles, sorting -is off by default. For the *dcd*, *grid/vtk*, *xtc*, *xyz*, and +For the *atom*, *custom*, *cfg*, *grid*, and *local* styles, sorting is +off by default. For the *dcd*, *extxyz*, *grid/vtk*, *xtc*, *xyz*, and *molfile* styles, sorting by atom ID or grid ID is on by default. See the :doc:`dump_modify ` page for details. The *style* keyword determines what kind of data is written to the dump file(s) and in what format. -Note that *atom*, *custom*, *dcd*, *xtc*, *xyz*, and *yaml* style dump -files can be read directly by `VMD `_, -a popular tool for visualizing and analyzing trajectories from atomic -and molecular systems. For reading *netcdf* style dump files, the -netcdf plugin needs to be recompiled from source using a NetCDF version -compatible with the one used by LAMMPS. The bundled plugin binary -uses a very old version of NetCDF that is not compatible with LAMMPS. +Note that *atom*, *custom*, *dcd*, *extxyz*, *xtc*, *xyz*, and *yaml* +style dump files can be read directly by `VMD +`_, a popular tool for visualizing +and analyzing trajectories from atomic and molecular systems. For +reading *netcdf* style dump files, the netcdf plugin needs to be +recompiled from source using a NetCDF version compatible with the one +used by LAMMPS. The bundled plugin binary uses a very old version of +NetCDF that is not compatible with LAMMPS. Likewise the `OVITO visualization package `_, -popular for materials modeling, can read the *atom*, *custom*, +popular for materials modeling, can read the *atom*, *custom*, *extxyz*, *local*, *xtc*, *cfg*, *netcdf*, and *xyz* style atom dump files -directly. With version 3.8 and above, OVITO can also read and -visualize *grid* style dump files with grid cell data, including -iso-surface images of the grid cell values. +directly. With version 3.8 and above, OVITO can also read and visualize +*grid* style dump files with grid cell data, including iso-surface +images of the grid cell values. Note that settings made via the :doc:`dump_modify ` command can also alter the format of individual values and content of @@ -475,6 +478,24 @@ label). This option will help many visualization programs to guess bonds and colors. You can use the :doc:`dump_modify types labels ` option to replace numeric atom types with :doc:`type labels `. +.. versionadded:: TBD + +The *extxyz* style writes XYZ files compatible with the Extended XYZ (or +ExtXYZ) format as defined as defined in `the libAtoms specification +`_. Specifically, the following +information will be dumped: + +* timestep +* time, which can be disabled with :doc:`dump_modify time no ` +* simulation box lattice and pbc conditions +* atomic forces, which can be disabled with :doc:`dump_modify forces no ` +* atomic velocities, which can be disabled with :doc:`dump_modify vel no ` +* atomic masses, if enabled with :doc:`dump_modify mass yes ` + +Dump style *extxyz* requires either that a :doc:`type label map for atoms types +` is defined or :doc:`dump_modify element ` is used to +set up an atom type number to atom name mapping. + .. versionadded:: 22Dec2022 The *grid/vtk* style writes VTK files for grid data on a regular @@ -607,8 +628,8 @@ with the processor ID from :math:`0` to :math:`P-1`. For example, tmp.dump.% becomes tmp.dump.0, tmp.dump.1, ... tmp.dump.:math:`P-1`, etc. This creates smaller files and can be a fast mode of output on parallel machines that support parallel I/O for output. This option is -**not** available for the *dcd*, *xtc*, *xyz*, *grid/vtk*, and *yaml* -styles. +**not** available for the *dcd*, *extxyz*, *xtc*, *xyz*, *grid/vtk*, and +*yaml* styles. By default, :math:`P` is the the number of processors, meaning one file per processor, but :math:`P` can be set to a smaller value via the *nfile* or @@ -1017,9 +1038,9 @@ the COMPRESS package. They are only enabled if LAMMPS was built with that package. See the :doc:`Build package ` page for more info. -The *xtc*, *dcd*, and *yaml* styles are part of the EXTRA-DUMP package. -They are only enabled if LAMMPS was built with that package. See the -:doc:`Build package ` page for more info. +The *dcd*, *extxyz*, *xtc*, and *yaml* styles are part of the EXTRA-DUMP +package. They are only enabled if LAMMPS was built with that package. +See the :doc:`Build package ` page for more info. Related commands """""""""""""""" diff --git a/doc/src/dump_modify.rst b/doc/src/dump_modify.rst index 1f2b1c8e0e..4786ea3c8f 100644 --- a/doc/src/dump_modify.rst +++ b/doc/src/dump_modify.rst @@ -92,6 +92,15 @@ Syntax see the :doc:`dump image ` doc page for details +* these keywords apply only to the extxyz dump style +* keyword = *forces* or *mass* or *vel* + + .. parsed-literal:: + + *forces* arg = *yes* or *no* + *mass* arg = *yes* or *no* + *vel* arg = *yes* or *no* + * these keywords apply only to the */gz* and */zstd* dump styles * keyword = *compression_level* @@ -972,9 +981,11 @@ The option defaults are * fileper = # of processors * first = no * flush = yes +* forces = yes * format = %d and %g for each integer or floating point value * image = no * label = ENTRIES +* mass = no * maxfiles = -1 * nfile = 1 * pad = 0 @@ -990,6 +1001,7 @@ The option defaults are * types = numeric * units = no * unwrap = no +* vel = yes * compression_level = 9 (gz variants) * compression_level = 0 (zstd variants) diff --git a/doc/src/fix.rst b/doc/src/fix.rst index f024fc6974..3b1bc4a75c 100644 --- a/doc/src/fix.rst +++ b/doc/src/fix.rst @@ -365,6 +365,7 @@ accelerated styles exist. * :doc:`qeq/fire ` - charge equilibration via FIRE minimizer * :doc:`qeq/point ` - charge equilibration via point method * :doc:`qeq/reaxff ` - charge equilibration for ReaxFF potential +* :doc:`qeq/rel/reaxff ` - charge equilibration for ReaxFF potential with alternate efield implementation * :doc:`qeq/shielded ` - charge equilibration via shielded method * :doc:`qeq/slater ` - charge equilibration via Slater method * :doc:`qmmm ` - functionality to enable a quantum mechanics/molecular mechanics coupling diff --git a/doc/src/fix_acks2_reaxff.rst b/doc/src/fix_acks2_reaxff.rst index 79a9cf8ea6..c198ae8a08 100644 --- a/doc/src/fix_acks2_reaxff.rst +++ b/doc/src/fix_acks2_reaxff.rst @@ -123,8 +123,10 @@ components in non-periodic directions. Related commands """""""""""""""" -:doc:`pair_style reaxff `, :doc:`fix qeq/reaxff `, -:doc:`fix qtpi/reaxff ` +:doc:`pair_style reaxff `, +:doc:`fix qeq/reaxff `, +:doc:`fix qtpie/reaxff `, +:doc:`fix qeq/rel/reaxff ` Default """"""" diff --git a/doc/src/fix_adapt.rst b/doc/src/fix_adapt.rst index 2b39917d86..a6c07fad4d 100644 --- a/doc/src/fix_adapt.rst +++ b/doc/src/fix_adapt.rst @@ -14,7 +14,7 @@ Syntax * adapt = style name of this fix command * N = adapt simulation settings every this many timesteps * one or more attribute/arg pairs may be appended -* attribute = *pair* or *bond* or *angle* or *kspace* or *atom* +* attribute = *pair* or *bond* or *angle* or *improper* or *kspace* or *atom* .. parsed-literal:: @@ -33,6 +33,11 @@ Syntax aparam = parameter to adapt over time I = type angle to set parameter for (integer or type label) v_name = variable with name that calculates value of aparam + *improper* args = istyle iparam I v_name + istyle = improper style name (e.g., cvff) + iparam = parameter to adapt over time + I = type improper to set parameter for (integer or type label) + v_name = variable with name that calculates value of iparam *kspace* arg = v_name v_name = variable with name that calculates scale factor on :math:`k`-space terms *atom* args = atomparam v_name @@ -428,6 +433,56 @@ this fix uses to reset theta0 needs to generate values in radians. ---------- +.. versionadded:: TBD + +The *improper* keyword uses the specified variable to change the value of +an improper coefficient over time, very similar to how the *angle* keyword +operates. The only difference is that now an improper coefficient for a +given improper type is adapted. + +A wild-card asterisk can be used in place of or in conjunction with the +improper type argument to set the coefficients for multiple improper types. +This takes the form "\*" or "\*n" or "m\*" or "m\*n". If :math:`N` is +the number of improper types, then an asterisk with no numeric values means +all types from 1 to :math:`N`. A leading asterisk means all types from +1 to n (inclusive). A trailing asterisk means all types from m to +:math:`N` (inclusive). A middle asterisk means all types from m to n +(inclusive). + +If :doc:`improper_style hybrid ` is used, *istyle* should be a +sub-style name. The improper styles that currently work with fix adapt are: + ++---------------------------------------------------------+----------------+----------------+ +| :doc:`amoeba ` | k | type impropers | ++---------------------------------------------------------+----------------+----------------+ +| :doc:`class2 ` | k,chi0 | type impropers | ++---------------------------------------------------------+----------------+----------------+ +| :doc:`cossq ` | k,chi0 | type impropers | ++---------------------------------------------------------+----------------+----------------+ +| :doc:`cvff ` | k,d,n | type impropers | ++---------------------------------------------------------+----------------+----------------+ +| :doc:`distance ` | k2,k4 | type impropers | ++---------------------------------------------------------+----------------+----------------+ +| :doc:`distharm ` | k,d0 | type impropers | ++---------------------------------------------------------+----------------+----------------+ +| :doc:`fourier ` | k,C0,C1,C2 | type impropers | ++---------------------------------------------------------+----------------+----------------+ +| :doc:`harmonic ` | k,chi0 | type impropers | ++---------------------------------------------------------+----------------+----------------+ +| :doc:`inversion/harmonic ` | k,w0 | type impropers | ++---------------------------------------------------------+----------------+----------------+ +| :doc:`ring ` | k,theta0 | type impropers | ++---------------------------------------------------------+----------------+----------------+ +| :doc:`umbrella ` | k,w0 | type impropers | ++---------------------------------------------------------+----------------+----------------+ +| :doc:`sqdistharm ` | k | type impropers | ++---------------------------------------------------------+----------------+----------------+ + +Note that internally, chi0 and theta0 are stored in radians, so the variable +this fix use to reset chi0 or theta0 needs to generate values in radians. + +---------- + The *kspace* keyword used the specified variable as a scale factor on the energy, forces, virial calculated by whatever :math:`k`-space solver is defined by the :doc:`kspace_style ` command. If the diff --git a/doc/src/fix_qeq_reaxff.rst b/doc/src/fix_qeq_reaxff.rst index e1a09c4fc3..7441138396 100644 --- a/doc/src/fix_qeq_reaxff.rst +++ b/doc/src/fix_qeq_reaxff.rst @@ -59,7 +59,7 @@ extracted from the :doc:`pair_style reaxff ` command and the ReaxFF force field file it reads in. If a file name is specified for *params*, then the parameters are taken from the specified file and the file must contain one line for each atom type. The latter -form must be used when performing QeQ with a non-ReaxFF potential. +form must be used when performing QEq with a non-ReaxFF potential. Each line should be formatted as follows: .. parsed-literal:: @@ -140,7 +140,8 @@ Related commands """""""""""""""" :doc:`pair_style reaxff `, :doc:`fix qeq/shielded `, -:doc:`fix acks2/reaxff `, :doc:`fix qtpie/reaxff ` +:doc:`fix acks2/reaxff `, :doc:`fix qtpie/reaxff `, +:doc:`fix qeq/rel/reaxff ` Default """"""" diff --git a/doc/src/fix_qeq_rel_reaxff.rst b/doc/src/fix_qeq_rel_reaxff.rst new file mode 100644 index 0000000000..012980e230 --- /dev/null +++ b/doc/src/fix_qeq_rel_reaxff.rst @@ -0,0 +1,195 @@ +.. index:: fix qeq/rel/reaxff + +fix qeq/rel/reaxff command +========================== + +Syntax +"""""" + +.. code-block:: LAMMPS + + fix ID group-ID qeq/rel/reaxff Nevery cutlo cuthi tolerance params gfile args + +* ID, group-ID are documented in :doc:`fix ` command +* qeq/rel/reaxff = style name of this fix command +* Nevery = perform QEqR every this many steps +* cutlo,cuthi = lo and hi cutoff for Taper radius +* tolerance = precision to which charges will be equilibrated +* params = reaxff or a filename +* gfile = the name of a file containing Gaussian orbital exponents +* one or more keywords or keyword/value pairs may be appended + + .. parsed-literal:: + + keyword = *scale* or *maxiter* or *nowarn* + *scale* beta = set value of scaling factor *beta* (determines strength of electric polarization) + *maxiter* N = limit the number of iterations to *N* + *nowarn* = do not print a warning message if the maximum number of iterations is reached + +Examples +"""""""" + +.. code-block:: LAMMPS + + fix 1 all qeq/rel/reaxff 1 0.0 10.0 1.0e-6 reaxff exp.qeqr + fix 1 all qeq/rel/reaxff 1 0.0 10.0 1.0e-6 params.qeqr exp.qeqr scale 1.5 maxiter 500 nowarn + +Description +""""""""""" + +.. versionadded:: 19Nov2024 + +This fix implements the QEqR method for charge equilibration, which +differs from the QEq charge equilibration method :ref:`(Rappe and +Goddard) ` only in how external electric fields are accounted +for. This fix therefore raises a warning when used without :doc:`fix +efield ` since :doc:`fix qeq/reaxff ` should +be used without an external electric field. Charges are computed with +the QEqR method by minimizing the electrostatic energy of the system in +the same way as the QEq method but where the absolute electronegativity, +:math:`\chi_i`, of each atom in the QEq method is replaced with an +effective electronegativity given by + +.. math:: + \chi_{\mathrm{r}i} = \chi_i + \frac{\sum_{j=1}^{N} \beta(\phi_i - \phi_j) S_{ij}} + {\sum_{m=1}^{N}S_{im}}, + +where :math:`N` is the number of atoms in the system, :math:`\beta` is a +scaling factor, :math:`\phi_i` and :math:`\phi_j` are the electric +potentials at the positions of atoms :math:`i` and :math:`j` due to the +external electric field and :math:`S_{ij}` is the overlap integral +between atoms :math:`i` and :math:`j`. This formulation is advantageous +over the method used by :doc:`fix qeq/reaxff ` to +account for an external electric field in that it permits periodic +boundaries in the direction of an external electric field and in that it +does not worsen long-range charge transfer seen with QEq. + +This fix is typically used in conjunction with the ReaxFF force field +model as implemented in the :doc:`pair_style reaxff ` +command, but it can be used with any potential in LAMMPS, so long as it +defines and uses charges on each atom. For more technical details about +the charge equilibration performed by *fix qeq/rel/reaxff*, which is the +same as in :doc:`fix qeq/reaxff ` except for the use of +:math:`\chi_{\mathrm{r}i}`, please refer to :ref:`(Aktulga) +`. To be explicit, *fix qeq/rel/reaxff* replaces +:math:`\chi_k` of eq. 3 in :ref:`(Aktulga) ` with +:math:`\chi_{\mathrm{r}k}` when an external electric field is applied. + +This fix requires the absolute electronegativity, :math:`\chi`, in eV, +the self-Coulomb potential, :math:`\eta`, in eV, and the shielded +Coulomb constant, :math:`\gamma`, in :math:`\AA^{-1}`. If the *params* +setting above is the word "reaxff", then these are extracted from the +:doc:`pair_style reaxff ` command and the ReaxFF force +field file it reads in. If a file name is specified for *params*, then +the parameters are taken from the specified file and the file must +contain one line for each atom type. The latter form must be used when +using this fix with a non-ReaxFF potential. Each line should be +formatted as follows, ensuring that the parameters are given in units of +eV, eV, and :math:`\AA^{-1}`, respectively: + +.. parsed-literal:: + + itype chi eta gamma + +where *itype* is the atom type from 1 to Ntypes. Note that eta is +defined here as twice the eta value in the ReaxFF file. + +The overlap integrals :math:`S_{ij}` are computed by using normalized 1s +Gaussian type orbitals. The Gaussian orbital exponents, :math:`\alpha`, +that are needed to compute the overlap integrals are taken from the file +given by *gfile*. This file must contain one line for each atom type +and provide the Gaussian orbital exponent for each atom type in units of +inverse square Bohr radius. Each line should be formatted as follows: + +.. parsed-literal:: + + itype alpha + +Empty lines or any text following the pound sign (#) are ignored. An +example *gfile* for a system with two atom types is + +.. parsed-literal:: + + # An example gfile. Exponents are taken from Table 2.2 of Chen, J. (2009). + # Theory and applications of fluctuating-charge models. + # The units of the exponents are 1 / (Bohr radius)^2 . + 1 0.2240 # O + 2 0.5434 # H + +The optional *scale* keyword sets the value of :math:`\beta` in the +equation for :math:`\chi_{\mathrm{r}i}`. The default value is 1.0. + +The optional *maxiter* keyword allows changing the max number of +iterations in the linear solver. The default value is 200. + +The optional *nowarn* keyword silences the warning message printed when +the maximum number of iterations is reached. This can be useful for +comparing serial and parallel results where having the same fixed number +of iterations is desired, which can be achieved by using a very small +tolerance and setting *maxiter* to the desired number of iterations. + +.. note:: + + In order to solve the self-consistent equations for electronegativity + equalization, LAMMPS imposes the additional constraint that all the + charges in the fix group must add up to zero. The initial charge + assignments should also satisfy this constraint. LAMMPS will print a + warning if that is not the case. + +Restart, fix_modify, output, run start/stop, minimize info +""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" + +No information about this fix is written to :doc:`binary restart files +`. This fix computes a global scalar (the number of +iterations) and a per-atom vector (the effective electronegativity), +which can be accessed by various :doc:`output commands `. +No parameter of this fix can be used with the *start/stop* keywords of +the :doc:`run ` command. + +This fix is invoked during :doc:`energy minimization `. + +Restrictions +"""""""""""" + +This fix is part of the REAXFF package. It is only enabled if LAMMPS +was built with that package. See the :doc:`Build package +` page for more info. + +This fix does not correctly handle interactions involving multiple +periodic images of the same atom. Hence, it should not be used for +periodic cell dimensions smaller than the non-bonded cutoff radius, +which is typically :math:`10~\AA` for ReaxFF simulations. + +This fix may be used in combination with :doc:`fix efield ` +and will apply the external electric field during charge equilibration, +but there may be only one fix efield instance used and the electric +field must be applied to all atoms in the system. Consequently, `fix +efield` must be used with *group-ID* all and must not be used with the +keyword *region*. Equal-style variables can be used for electric field +vector components without any further settings. Atom-style variables can +be used for spatially-varying electric field vector components, but the +resulting electric potential must be specified as an atom-style variable +using the *potential* keyword for `fix efield`. + +Related commands +"""""""""""""""" + +:doc:`pair_style reaxff `, :doc:`fix qeq/reaxff `, +:doc:`fix acks2/reaxff `, :doc:`fix qtpie/reaxff ` + +Default +""""""" + +scale = 1.0 and maxiter = 200 + +---------- + +.. _Rappe4: + +**(Rappe)** Rappe and Goddard III, Journal of Physical Chemistry, 95, +3358-3363 (1991). + +.. _qeq-Aktulga3: + +**(Aktulga)** Aktulga, Fogarty, Pandit, Grama, Parallel Computing, 38, +245-259 (2012). diff --git a/doc/src/fix_qtpie_reaxff.rst b/doc/src/fix_qtpie_reaxff.rst index e96cbec459..08ae24e164 100644 --- a/doc/src/fix_qtpie_reaxff.rst +++ b/doc/src/fix_qtpie_reaxff.rst @@ -21,8 +21,10 @@ Syntax .. parsed-literal:: - keyword = *maxiter* + keyword = *scale* or *maxiter* or *nowarn* + *scale* beta = set value of scaling factor *beta* (determines strength of electric polarization) *maxiter* N = limit the number of iterations to *N* + *nowarn* = do not print a warning message if the maximum number of iterations is reached Examples """""""" @@ -30,7 +32,7 @@ Examples .. code-block:: LAMMPS fix 1 all qtpie/reaxff 1 0.0 10.0 1.0e-6 reaxff exp.qtpie - fix 1 all qtpie/reaxff 1 0.0 10.0 1.0e-6 params.qtpie exp.qtpie maxiter 500 + fix 1 all qtpie/reaxff 1 0.0 10.0 1.0e-6 params.qtpie exp.qtpie scale 1.5 maxiter 500 nowarn Description """"""""""" @@ -46,7 +48,7 @@ same way as the QEq method but where the absolute electronegativity, electronegativity given by :ref:`(Chen) ` .. math:: - \chi_{\mathrm{eff},i} = \frac{\sum_{j=1}^{N} (\chi_i - \chi_j) S_{ij}} + \tilde{\chi}_{i} = \frac{\sum_{j=1}^{N} (\chi_i - \chi_j) S_{ij}} {\sum_{m=1}^{N}S_{im}}, which acts to penalize long-range charge transfer seen with the QEq charge @@ -61,11 +63,11 @@ electric field by using the effective electronegativity given in :ref:`(Gergs) `: .. math:: - \chi_{\mathrm{eff},i} = \frac{\sum_{j=1}^{N} (\chi_i - \chi_j + \phi_i - \phi_j) S_{ij}} + \tilde{\chi}_{\mathrm{r}i} = \frac{\sum_{j=1}^{N} (\chi_i - \chi_j + \beta(\phi_i - \phi_j)) S_{ij}} {\sum_{m=1}^{N}S_{im}}, -where :math:`\phi_i` and :math:`\phi_j` are the electric -potentials at the positions of atom :math:`i` and :math:`j` +where :math:`\beta` is a scaling factor and :math:`\phi_i` and :math:`\phi_j` +are the electric potentials at the positions of atoms :math:`i` and :math:`j` due to the external electric field. This fix is typically used in conjunction with the ReaxFF force @@ -74,9 +76,12 @@ command, but it can be used with any potential in LAMMPS, so long as it defines and uses charges on each atom. For more technical details about the charge equilibration performed by `fix qtpie/reaxff`, which is the same as in :doc:`fix qeq/reaxff ` except for the use of -:math:`\chi_{\mathrm{eff},i}`, please refer to :ref:`(Aktulga) `. +:math:`\tilde{\chi}_{i}` or :math:`\tilde{\chi}_{\mathrm{r}i}`, +please refer to :ref:`(Aktulga) `. To be explicit, this fix replaces :math:`\chi_k` of eq. 3 in -:ref:`(Aktulga) ` with :math:`\chi_{\mathrm{eff},k}`. +:ref:`(Aktulga) ` with :math:`\tilde{\chi}_{k}` when no external +electric field is applied and with :math:`\tilde{\chi}_{\mathrm{r}k}` when an +external electric field is applied. This fix requires the absolute electronegativity, :math:`\chi`, in eV, the self-Coulomb potential, :math:`\eta`, in eV, and the shielded Coulomb @@ -97,7 +102,7 @@ respectively: where *itype* is the atom type from 1 to Ntypes. Note that eta is defined here as twice the eta value in the ReaxFF file. -The overlap integrals in the equation for :math:`\chi_{\mathrm{eff},i}` +The overlap integrals :math:`S_{ij}` are computed by using normalized 1s Gaussian type orbitals. The Gaussian orbital exponents, :math:`\alpha`, that are needed to compute the overlap integrals are taken from the file given by *gfile*. @@ -120,15 +125,26 @@ Empty lines or any text following the pound sign (#) are ignored. An example 1 0.2240 # O 2 0.5434 # H +The optional *scale* keyword sets the value of :math:`\beta` in the equation for +:math:`\tilde{\chi}_{\mathrm{r}i}`. This keyword only affects the computed charges +when :doc:`fix efield ` is used. The default value is 1.0. + The optional *maxiter* keyword allows changing the max number of iterations in the linear solver. The default value is 200. +The optional *nowarn* keyword silences the warning message printed +when the maximum number of iterations is reached. This can be +useful for comparing serial and parallel results where having the +same fixed number of iterations is desired, which can be achieved +by using a very small tolerance and setting *maxiter* to the desired +number of iterations. + .. note:: In order to solve the self-consistent equations for electronegativity equalization, LAMMPS imposes the additional constraint that all the - charges in the fix group must add up to zero. The initial charge - assignments should also satisfy this constraint. LAMMPS will print a + charges in the fix group must add up to zero. The initial charge + assignments should also satisfy this constraint. LAMMPS will print a warning if that is not the case. Restart, fix_modify, output, run start/stop, minimize info @@ -170,12 +186,13 @@ Related commands """""""""""""""" :doc:`pair_style reaxff `, :doc:`fix qeq/reaxff `, -:doc:`fix acks2/reaxff ` +:doc:`fix acks2/reaxff `, +:doc:`fix qeq/rel/reaxff ` Default """"""" -maxiter 200 +scale = 1.0 and maxiter = 200 ---------- diff --git a/doc/utils/sphinx-config/false_positives.txt b/doc/utils/sphinx-config/false_positives.txt index 7d78b475cd..577d2c23de 100644 --- a/doc/utils/sphinx-config/false_positives.txt +++ b/doc/utils/sphinx-config/false_positives.txt @@ -725,6 +725,7 @@ dashpot dat datafile datatype +dataset datums Davidchack Daw @@ -3120,9 +3121,11 @@ qE qeff qelectron qeq +qeqr Qamar QeQ QEq +QEqR qfactor qfile qi diff --git a/examples/reaxff/water/in.water.qeqr b/examples/reaxff/water/in.water.qeqr new file mode 100644 index 0000000000..6debe0b895 --- /dev/null +++ b/examples/reaxff/water/in.water.qeqr @@ -0,0 +1,29 @@ +# Water with QEqR + +boundary p p p +units real +atom_style charge + +read_data data.water + +variable x index 1 +variable y index 1 +variable z index 1 + +replicate $x $y $z + +pair_style reaxff NULL safezone 3.0 mincap 150 +pair_coeff * * qeq_ff.water O H +neighbor 0.5 bin +neigh_modify every 1 delay 0 check yes + +velocity all create 300.0 4928459 rot yes dist gaussian + +fix 1 all qeqr/reaxff 1 0.0 10.0 1.0e-6 reaxff gauss_exp.txt +fix 2 all nvt temp 300 300 50.0 + +timestep 0.5 +thermo 10 +thermo_style custom step temp press density vol + +run 20 diff --git a/examples/reaxff/water/in.water.qeqr.field b/examples/reaxff/water/in.water.qeqr.field new file mode 100644 index 0000000000..9c61477ff7 --- /dev/null +++ b/examples/reaxff/water/in.water.qeqr.field @@ -0,0 +1,30 @@ +# Water with QEqR + +boundary p p p +units real +atom_style charge + +read_data data.water + +variable x index 1 +variable y index 1 +variable z index 1 + +replicate $x $y $z + +pair_style reaxff NULL safezone 3.0 mincap 150 +pair_coeff * * qeq_ff.water O H +neighbor 0.5 bin +neigh_modify every 1 delay 0 check yes + +velocity all create 300.0 4928459 rot yes dist gaussian + +fix 1 all qeqr/reaxff 1 0.0 10.0 1.0e-6 reaxff gauss_exp.txt +fix 2 all nvt temp 300 300 50.0 +fix 3 all efield 0.0 0.0 0.05 + +timestep 0.5 +thermo 10 +thermo_style custom step temp press density vol + +run 20 diff --git a/examples/reaxff/water/log.20Mar25.reaxff.water-qeqr-field.g++.1 b/examples/reaxff/water/log.20Mar25.reaxff.water-qeqr-field.g++.1 new file mode 100644 index 0000000000..7f4c84d0f0 --- /dev/null +++ b/examples/reaxff/water/log.20Mar25.reaxff.water-qeqr-field.g++.1 @@ -0,0 +1,115 @@ +LAMMPS (4 Feb 2025 - Development - patch_4Feb2025-444-gbb8b6590d5-modified) + using 1 OpenMP thread(s) per MPI task +# Water with QEqR + +boundary p p p +units real +atom_style charge + +read_data data.water +Reading data file ... + orthogonal box = (0 0 0) to (31.043046 31.043046 31.043046) + 1 by 1 by 1 MPI processor grid + reading atoms ... + 3000 atoms + read_data CPU = 0.053 seconds + +variable x index 1 +variable y index 1 +variable z index 1 + +replicate $x $y $z +replicate 1 $y $z +replicate 1 1 $z +replicate 1 1 1 +Replication is creating a 1x1x1 = 1 times larger system... + orthogonal box = (0 0 0) to (31.043046 31.043046 31.043046) + 1 by 1 by 1 MPI processor grid + 3000 atoms + replicate CPU = 0.001 seconds + +pair_style reaxff NULL safezone 3.0 mincap 150 +pair_coeff * * qeq_ff.water O H +WARNING: Changed valency_val to valency_boc for X (src/REAXFF/reaxff_ffield.cpp:300) +neighbor 0.5 bin +neigh_modify every 1 delay 0 check yes + +velocity all create 300.0 4928459 rot yes dist gaussian + +fix 1 all qeqr/reaxff 1 0.0 10.0 1.0e-6 reaxff gauss_exp.txt +fix 2 all nvt temp 300 300 50.0 +fix 3 all efield 0.0 0.0 0.05 + +timestep 0.5 +thermo 10 +thermo_style custom step temp press density vol + +run 20 + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Your simulation uses code contributions which should be cited: + +- pair reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + number = {4--5}, + pages = {245--259} +} + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Neighbor list info ... + update: every = 1 steps, delay = 0 steps, check = yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 10.5 + ghost atom cutoff = 10.5 + binsize = 5.25, bins = 6 6 6 + 2 neighbor lists, perpetual/occasional/extra = 2 0 0 + (1) pair reaxff, perpetual + attributes: half, newton off, ghost + pair build: half/bin/ghost/newtoff + stencil: full/ghost/bin/3d + bin: standard + (2) fix qeqr/reaxff, perpetual, copy from (1) + attributes: half, newton off + pair build: copy + stencil: none + bin: none +Per MPI rank memory allocation (min/avg/max) = 539.2 | 539.2 | 539.2 Mbytes + Step Temp Press Density Volume + 0 300 778.75601 1 29915.273 + 10 301.42845 5423.6612 1 29915.273 + 20 298.24707 1549.2257 1 29915.273 +Loop time of 10.6859 on 1 procs for 20 steps with 3000 atoms + +Performance: 0.081 ns/day, 296.830 hours/ns, 1.872 timesteps/s, 5.615 katom-step/s +100.0% CPU use with 1 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 4.7595 | 4.7595 | 4.7595 | 0.0 | 44.54 +Neigh | 0.17605 | 0.17605 | 0.17605 | 0.0 | 1.65 +Comm | 0.0017511 | 0.0017511 | 0.0017511 | 0.0 | 0.02 +Output | 8.3809e-05 | 8.3809e-05 | 8.3809e-05 | 0.0 | 0.00 +Modify | 5.748 | 5.748 | 5.748 | 0.0 | 53.79 +Other | | 0.0005279 | | | 0.00 + +Nlocal: 3000 ave 3000 max 3000 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 11075 ave 11075 max 11075 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 971785 ave 971785 max 971785 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 971785 +Ave neighs/atom = 323.92833 +Neighbor list builds = 2 +Dangerous builds = 0 +Total wall time: 0:00:12 diff --git a/examples/reaxff/water/log.20Mar25.reaxff.water-qeqr-field.g++.4 b/examples/reaxff/water/log.20Mar25.reaxff.water-qeqr-field.g++.4 new file mode 100644 index 0000000000..722609d9bf --- /dev/null +++ b/examples/reaxff/water/log.20Mar25.reaxff.water-qeqr-field.g++.4 @@ -0,0 +1,115 @@ +LAMMPS (4 Feb 2025 - Development - patch_4Feb2025-444-gbb8b6590d5-modified) + using 1 OpenMP thread(s) per MPI task +# Water with QEqR + +boundary p p p +units real +atom_style charge + +read_data data.water +Reading data file ... + orthogonal box = (0 0 0) to (31.043046 31.043046 31.043046) + 1 by 2 by 2 MPI processor grid + reading atoms ... + 3000 atoms + read_data CPU = 0.053 seconds + +variable x index 1 +variable y index 1 +variable z index 1 + +replicate $x $y $z +replicate 1 $y $z +replicate 1 1 $z +replicate 1 1 1 +Replication is creating a 1x1x1 = 1 times larger system... + orthogonal box = (0 0 0) to (31.043046 31.043046 31.043046) + 1 by 2 by 2 MPI processor grid + 3000 atoms + replicate CPU = 0.002 seconds + +pair_style reaxff NULL safezone 3.0 mincap 150 +pair_coeff * * qeq_ff.water O H +WARNING: Changed valency_val to valency_boc for X (src/REAXFF/reaxff_ffield.cpp:300) +neighbor 0.5 bin +neigh_modify every 1 delay 0 check yes + +velocity all create 300.0 4928459 rot yes dist gaussian + +fix 1 all qeqr/reaxff 1 0.0 10.0 1.0e-6 reaxff gauss_exp.txt +fix 2 all nvt temp 300 300 50.0 +fix 3 all efield 0.0 0.0 0.05 + +timestep 0.5 +thermo 10 +thermo_style custom step temp press density vol + +run 20 + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Your simulation uses code contributions which should be cited: + +- pair reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + number = {4--5}, + pages = {245--259} +} + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Neighbor list info ... + update: every = 1 steps, delay = 0 steps, check = yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 10.5 + ghost atom cutoff = 10.5 + binsize = 5.25, bins = 6 6 6 + 2 neighbor lists, perpetual/occasional/extra = 2 0 0 + (1) pair reaxff, perpetual + attributes: half, newton off, ghost + pair build: half/bin/ghost/newtoff + stencil: full/ghost/bin/3d + bin: standard + (2) fix qeqr/reaxff, perpetual, copy from (1) + attributes: half, newton off + pair build: copy + stencil: none + bin: none +Per MPI rank memory allocation (min/avg/max) = 260.5 | 262.2 | 263.6 Mbytes + Step Temp Press Density Volume + 0 300 778.75601 1 29915.273 + 10 301.42845 5423.6623 1 29915.273 + 20 298.24708 1549.2264 1 29915.273 +Loop time of 3.10467 on 4 procs for 20 steps with 3000 atoms + +Performance: 0.278 ns/day, 86.241 hours/ns, 6.442 timesteps/s, 19.326 katom-step/s +99.6% CPU use with 4 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 1.668 | 1.6843 | 1.7266 | 1.9 | 54.25 +Neigh | 0.08549 | 0.086004 | 0.086638 | 0.2 | 2.77 +Comm | 0.0135 | 0.055821 | 0.072105 | 10.4 | 1.80 +Output | 4.9632e-05 | 5.4515e-05 | 6.8384e-05 | 0.0 | 0.00 +Modify | 1.2774 | 1.2781 | 1.2786 | 0.0 | 41.17 +Other | | 0.000458 | | | 0.01 + +Nlocal: 750 ave 760 max 735 min +Histogram: 1 0 0 0 1 0 0 0 0 2 +Nghost: 6230.75 ave 6255 max 6191 min +Histogram: 1 0 0 0 0 1 0 0 1 1 +Neighs: 276996 ave 280553 max 271385 min +Histogram: 1 0 0 0 0 1 0 0 0 2 + +Total # of neighbors = 1107985 +Ave neighs/atom = 369.32833 +Neighbor list builds = 2 +Dangerous builds = 0 +Total wall time: 0:00:03 diff --git a/examples/reaxff/water/log.20Mar25.reaxff.water-qeqr.g++.1 b/examples/reaxff/water/log.20Mar25.reaxff.water-qeqr.g++.1 new file mode 100644 index 0000000000..9710c81bcb --- /dev/null +++ b/examples/reaxff/water/log.20Mar25.reaxff.water-qeqr.g++.1 @@ -0,0 +1,116 @@ +LAMMPS (4 Feb 2025 - Development - patch_4Feb2025-444-gbb8b6590d5-modified) + using 1 OpenMP thread(s) per MPI task +# Water with QEqR + +boundary p p p +units real +atom_style charge + +read_data data.water +Reading data file ... + orthogonal box = (0 0 0) to (31.043046 31.043046 31.043046) + 1 by 1 by 1 MPI processor grid + reading atoms ... + 3000 atoms + read_data CPU = 0.055 seconds + +variable x index 1 +variable y index 1 +variable z index 1 + +replicate $x $y $z +replicate 1 $y $z +replicate 1 1 $z +replicate 1 1 1 +Replication is creating a 1x1x1 = 1 times larger system... + orthogonal box = (0 0 0) to (31.043046 31.043046 31.043046) + 1 by 1 by 1 MPI processor grid + 3000 atoms + replicate CPU = 0.001 seconds + +pair_style reaxff NULL safezone 3.0 mincap 150 +pair_coeff * * qeq_ff.water O H +WARNING: Changed valency_val to valency_boc for X (src/REAXFF/reaxff_ffield.cpp:300) +neighbor 0.5 bin +neigh_modify every 1 delay 0 check yes + +velocity all create 300.0 4928459 rot yes dist gaussian + +fix 1 all qeqr/reaxff 1 0.0 10.0 1.0e-6 reaxff gauss_exp.txt +fix 2 all nvt temp 300 300 50.0 + +timestep 0.5 +thermo 10 +thermo_style custom step temp press density vol + +run 20 + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Your simulation uses code contributions which should be cited: + +- pair reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + number = {4--5}, + pages = {245--259} +} + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +WARNING: Use fix qeq/reaxff instead of fix qeqr/reaxff when not using fix efield + (src/REAXFF/fix_qtpie_reaxff.cpp:493) +Neighbor list info ... + update: every = 1 steps, delay = 0 steps, check = yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 10.5 + ghost atom cutoff = 10.5 + binsize = 5.25, bins = 6 6 6 + 2 neighbor lists, perpetual/occasional/extra = 2 0 0 + (1) pair reaxff, perpetual + attributes: half, newton off, ghost + pair build: half/bin/ghost/newtoff + stencil: full/ghost/bin/3d + bin: standard + (2) fix qeqr/reaxff, perpetual, copy from (1) + attributes: half, newton off + pair build: copy + stencil: none + bin: none +Per MPI rank memory allocation (min/avg/max) = 539.2 | 539.2 | 539.2 Mbytes + Step Temp Press Density Volume + 0 300 780.33989 1 29915.273 + 10 301.29205 5433.7414 1 29915.273 + 20 297.90652 1572.6111 1 29915.273 +Loop time of 6.87447 on 1 procs for 20 steps with 3000 atoms + +Performance: 0.126 ns/day, 190.957 hours/ns, 2.909 timesteps/s, 8.728 katom-step/s +100.0% CPU use with 1 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 4.8461 | 4.8461 | 4.8461 | 0.0 | 70.49 +Neigh | 0.17595 | 0.17595 | 0.17595 | 0.0 | 2.56 +Comm | 0.001787 | 0.001787 | 0.001787 | 0.0 | 0.03 +Output | 8.5794e-05 | 8.5794e-05 | 8.5794e-05 | 0.0 | 0.00 +Modify | 1.8501 | 1.8501 | 1.8501 | 0.0 | 26.91 +Other | | 0.0004811 | | | 0.01 + +Nlocal: 3000 ave 3000 max 3000 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 11077 ave 11077 max 11077 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 971826 ave 971826 max 971826 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 971826 +Ave neighs/atom = 323.942 +Neighbor list builds = 2 +Dangerous builds = 0 +Total wall time: 0:00:07 diff --git a/examples/reaxff/water/log.20Mar25.reaxff.water-qeqr.g++.4 b/examples/reaxff/water/log.20Mar25.reaxff.water-qeqr.g++.4 new file mode 100644 index 0000000000..e6182bf53a --- /dev/null +++ b/examples/reaxff/water/log.20Mar25.reaxff.water-qeqr.g++.4 @@ -0,0 +1,116 @@ +LAMMPS (4 Feb 2025 - Development - patch_4Feb2025-444-gbb8b6590d5-modified) + using 1 OpenMP thread(s) per MPI task +# Water with QEqR + +boundary p p p +units real +atom_style charge + +read_data data.water +Reading data file ... + orthogonal box = (0 0 0) to (31.043046 31.043046 31.043046) + 1 by 2 by 2 MPI processor grid + reading atoms ... + 3000 atoms + read_data CPU = 0.082 seconds + +variable x index 1 +variable y index 1 +variable z index 1 + +replicate $x $y $z +replicate 1 $y $z +replicate 1 1 $z +replicate 1 1 1 +Replication is creating a 1x1x1 = 1 times larger system... + orthogonal box = (0 0 0) to (31.043046 31.043046 31.043046) + 1 by 2 by 2 MPI processor grid + 3000 atoms + replicate CPU = 0.002 seconds + +pair_style reaxff NULL safezone 3.0 mincap 150 +pair_coeff * * qeq_ff.water O H +WARNING: Changed valency_val to valency_boc for X (src/REAXFF/reaxff_ffield.cpp:300) +neighbor 0.5 bin +neigh_modify every 1 delay 0 check yes + +velocity all create 300.0 4928459 rot yes dist gaussian + +fix 1 all qeqr/reaxff 1 0.0 10.0 1.0e-6 reaxff gauss_exp.txt +fix 2 all nvt temp 300 300 50.0 + +timestep 0.5 +thermo 10 +thermo_style custom step temp press density vol + +run 20 + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Your simulation uses code contributions which should be cited: + +- pair reaxff command: doi:10.1016/j.parco.2011.08.005 + +@Article{Aktulga12, + author = {H. M. Aktulga and J. C. Fogarty and S. A. Pandit and A. Y. Grama}, + title = {Parallel Reactive Molecular Dynamics: {N}umerical Methods and Algorithmic Techniques}, + journal = {Parallel Computing}, + year = 2012, + volume = 38, + number = {4--5}, + pages = {245--259} +} + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +WARNING: Use fix qeq/reaxff instead of fix qeqr/reaxff when not using fix efield + (src/REAXFF/fix_qtpie_reaxff.cpp:493) +Neighbor list info ... + update: every = 1 steps, delay = 0 steps, check = yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 10.5 + ghost atom cutoff = 10.5 + binsize = 5.25, bins = 6 6 6 + 2 neighbor lists, perpetual/occasional/extra = 2 0 0 + (1) pair reaxff, perpetual + attributes: half, newton off, ghost + pair build: half/bin/ghost/newtoff + stencil: full/ghost/bin/3d + bin: standard + (2) fix qeqr/reaxff, perpetual, copy from (1) + attributes: half, newton off + pair build: copy + stencil: none + bin: none +Per MPI rank memory allocation (min/avg/max) = 260.5 | 262.2 | 263.6 Mbytes + Step Temp Press Density Volume + 0 300 780.34006 1 29915.273 + 10 301.29205 5433.7414 1 29915.273 + 20 297.90652 1572.6112 1 29915.273 +Loop time of 2.52349 on 4 procs for 20 steps with 3000 atoms + +Performance: 0.342 ns/day, 70.097 hours/ns, 7.926 timesteps/s, 23.777 katom-step/s +99.0% CPU use with 4 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 1.7081 | 1.7518 | 1.7812 | 2.3 | 69.42 +Neigh | 0.10017 | 0.10116 | 0.10315 | 0.4 | 4.01 +Comm | 0.014848 | 0.044256 | 0.087941 | 14.7 | 1.75 +Output | 5.1199e-05 | 5.663e-05 | 7.1837e-05 | 0.0 | 0.00 +Modify | 0.62379 | 0.62575 | 0.62671 | 0.1 | 24.80 +Other | | 0.000504 | | | 0.02 + +Nlocal: 750 ave 759 max 735 min +Histogram: 1 0 0 0 0 1 0 0 0 2 +Nghost: 6230.5 ave 6256 max 6190 min +Histogram: 1 0 0 0 0 1 0 0 1 1 +Neighs: 277008 ave 280943 max 271394 min +Histogram: 1 0 0 0 0 1 0 0 1 1 + +Total # of neighbors = 1108032 +Ave neighs/atom = 369.344 +Neighbor list builds = 2 +Dangerous builds = 0 +Total wall time: 0:00:02 diff --git a/lib/kokkos/CHANGELOG.md b/lib/kokkos/CHANGELOG.md index 84bbd03585..7d39bd36ae 100644 --- a/lib/kokkos/CHANGELOG.md +++ b/lib/kokkos/CHANGELOG.md @@ -1,5 +1,72 @@ # CHANGELOG +## 4.6.00 + +[Full Changelog](https://github.com/kokkos/kokkos/compare/4.5.01...4.6.00) + +### Features: + +* Kokkos::Graph: Allow adding tasks to the graph via a `then`-node [\#7629](https://github.com/kokkos/kokkos/pull/7629) +* Kokkos::Graph: Allow construction from CUDA/HIP graph [\#7664](https://github.com/kokkos/kokkos/pull/7664) +* HIP: Add experimental support for using multiple GPUs from one process [\#7130](https://github.com/kokkos/kokkos/pull/7130) + +### Backend and Architecture Enhancements: + +#### CUDA: +* Improved reduction performance, in particular on H100 and newer [\#7823](https://github.com/kokkos/kokkos/pull/7823) + +#### HIP: +* Change block size deduction to prefer smaller blocks/teams [\#7509](https://github.com/kokkos/kokkos/pull/7509) +* Allocate memory with stream ordered semantics (i.e. use `hipMallocAsync`) [\#7659](https://github.com/kokkos/kokkos/pull/7659) +* Fix a segfault when a virtual function called inside a kernel requires too many registers[\#7660](https://github.com/kokkos/kokkos/pull/7660) + +#### SYCL: +* Improve sorting performance for non-contiguous views [\#7502](https://github.com/kokkos/kokkos/pull/7502) + +#### Serial: +* Reduce fences overhead when using `Kokkos_ENABLE_ATOMICS_BYPASS` [\#7821](https://github.com/kokkos/kokkos/pull/7821) + +### General Enhancements +* Allow use of `kokkos_check` in `Config.cmake` without warnings [\#7669](https://github.com/kokkos/kokkos/pull/7669) +* Add simd compound assignments and update simd reductions [\#7486](https://github.com/kokkos/kokkos/pull/7486) +* Improve performance of the `inclusive_scan` algorithm with Cuda and HIP [\#7542](https://github.com/kokkos/kokkos/pull/7542) +* Reduce tooling interface overhead (don't pay for what you don't use) [\#7817](https://github.com/kokkos/kokkos/pull/7817) +* Avoid storing the view in `RandomAccessIterator` to increase performance [\#7304](https://github.com/kokkos/kokkos/pull/7304) +* Make `RandomAccessIterator` fulfill `std::random_access_iterator concept` [\#7451](https://github.com/kokkos/kokkos/pull/7451) +* Include information about support for system allocated memory in `print_configuration` (Cuda and HIP) [\#7673](https://github.com/kokkos/kokkos/pull/7673) + +### Build System Changes +* Add support for Zen 4 AMD microarchitecture [\#7550](https://github.com/kokkos/kokkos/pull/7550) +* Enable NVIDIA Grace architecture with NVHPC [\#7858](https://github.com/kokkos/kokkos/pull/7858) +* Support static library builds when using CUDA as CMake language [\#7830](https://github.com/kokkos/kokkos/pull/7830) + +### Incompatibilities (i.e. breaking changes) +* Change SIMD comparison operator to return `simd_mask` instead of `bool` [\#7781](https://github.com/kokkos/kokkos/pull/7781) +* Remove classic Intel compiler (icpc) support [\#7737](https://github.com/kokkos/kokkos/pull/7737) +* Remove `operator[]` overloads of Kokkos `basic_simd` and `basic_simd_mask` that return a reference [\#7630](https://github.com/kokkos/kokkos/pull/7630) + +### Deprecations +* Deprecate `StaticCrsGraph` and move it to Kokkos Kernels into `KokkosSparse::` [\#7516](https://github.com/kokkos/kokkos/pull/7516) +* Deprecate `native_simd` and hide `simd_abi` [\#7472](https://github.com/kokkos/kokkos/pull/7472) +* Deprecate Makefile support [\#7613](https://github.com/kokkos/kokkos/pull/7613) +* DualView: Deprecate direct access to d_view and h_view [\#7716](https://github.com/kokkos/kokkos/pull/7716) + +### Bug Fixes +* Fix performance bug affecting `atomic_fetch_{add,sub,min,max,and,or,xor}` on integral types `long` and `unsigned long` with HIP [\#7816](https://github.com/kokkos/kokkos/pull/7816) +* Fix execution of ranges with more than 2B elements [\#7797](https://github.com/kokkos/kokkos/pull/7797) +* Fix clean target when embedding Kokkos in another project [\#7557](https://github.com/kokkos/kokkos/pull/7557) +* Fix Zen3 flag for NVHPC [\#7558](https://github.com/kokkos/kokkos/pull/7558) +* graph: nodes must be stored by the graph [\#7619](https://github.com/kokkos/kokkos/pull/7619) +* Make sure lock arrays are on device before launching a graph [\#7685](https://github.com/kokkos/kokkos/pull/7685) +* Performance bug in `RangePolicy`: construct error message if and only if the precondition is violated [\#7809](https://github.com/kokkos/kokkos/pull/7809) +* simd: fix a bug in scalar min/max [\#7813](https://github.com/kokkos/kokkos/pull/7813) +* simd: fix a bug in non-masked reductions [\#7845](https://github.com/kokkos/kokkos/pull/7845) +* Cuda: fix incorrect iteration in `MDRangePolicy` of rank > 4 for high iteration counts [\#7724](https://github.com/kokkos/kokkos/pull/7724) +* Cuda: ignore gcc assembler options in `nvcc-wrapper` [\#7492](https://github.com/kokkos/kokkos/pull/7492) +* Build system: hint to `ARCH_NATIVE` if ARMv9 Grace arch is not explicitly supported by the compiler [\#7862](https://github.com/kokkos/kokkos/pull/7862) +* Use right arch for MI300A in makefiles [\#7786](https://github.com/kokkos/kokkos/pull/7786) +* Fix compiling BasicView on MSVC [\#7751](https://github.com/kokkos/kokkos/pull/7751) + ## 4.5.01 [Full Changelog](https://github.com/kokkos/kokkos/compare/4.5.00...4.5.01) diff --git a/lib/kokkos/CMakeLists.txt b/lib/kokkos/CMakeLists.txt index 6a70bea149..7a4dc73444 100644 --- a/lib/kokkos/CMakeLists.txt +++ b/lib/kokkos/CMakeLists.txt @@ -148,8 +148,8 @@ elseif(NOT CMAKE_SIZEOF_VOID_P EQUAL 8) endif() set(Kokkos_VERSION_MAJOR 4) -set(Kokkos_VERSION_MINOR 5) -set(Kokkos_VERSION_PATCH 1) +set(Kokkos_VERSION_MINOR 6) +set(Kokkos_VERSION_PATCH 0) set(Kokkos_VERSION "${Kokkos_VERSION_MAJOR}.${Kokkos_VERSION_MINOR}.${Kokkos_VERSION_PATCH}") message(STATUS "Kokkos version: ${Kokkos_VERSION}") math(EXPR KOKKOS_VERSION "${Kokkos_VERSION_MAJOR} * 10000 + ${Kokkos_VERSION_MINOR} * 100 + ${Kokkos_VERSION_PATCH}") diff --git a/lib/kokkos/CTestConfig.cmake b/lib/kokkos/CTestConfig.cmake new file mode 100644 index 0000000000..deb80ab76a --- /dev/null +++ b/lib/kokkos/CTestConfig.cmake @@ -0,0 +1,4 @@ +set(CTEST_PROJECT_NAME Kokkos) +set(CTEST_NIGHTLY_START_TIME 01:00:00 UTC) +set(CTEST_SUBMIT_URL https://my.cdash.org/submit.php?project=Kokkos) +set(CTEST_DROP_SITE_CDASH TRUE) diff --git a/lib/kokkos/Makefile.kokkos b/lib/kokkos/Makefile.kokkos index abdfb7a316..65c576bb8d 100644 --- a/lib/kokkos/Makefile.kokkos +++ b/lib/kokkos/Makefile.kokkos @@ -1,18 +1,26 @@ # Default settings common options. -#SPARTA specific settings: +#LAMMPS specific settings: + +KOKKOS_USE_DEPRECATED_MAKEFILES=1 + ifndef KOKKOS_PATH KOKKOS_PATH=../../lib/kokkos endif CXXFLAGS=$(CCFLAGS) ifeq ($(mode),shared) -CXXFLAGS += $(SHFLAGS) + CXXFLAGS += $(SHFLAGS) +endif + + +ifneq ($(KOKKOS_USE_DEPRECATED_MAKEFILES), 1) + $(error Makefile support is deprecated. Only CMake builds will be supported from Kokkos 5 on. Set KOKKOS_USE_DEPRECATED_MAKEFILES=1 to silence this error.) endif KOKKOS_VERSION_MAJOR = 4 -KOKKOS_VERSION_MINOR = 5 -KOKKOS_VERSION_PATCH = 1 +KOKKOS_VERSION_MINOR = 6 +KOKKOS_VERSION_PATCH = 0 KOKKOS_VERSION = $(shell echo $(KOKKOS_VERSION_MAJOR)*10000+$(KOKKOS_VERSION_MINOR)*100+$(KOKKOS_VERSION_PATCH) | bc) # Options: Cuda,HIP,SYCL,OpenMPTarget,OpenMP,Threads,Serial @@ -24,7 +32,7 @@ KOKKOS_DEVICES ?= "OpenMP" # ARM: ARMv80,ARMv81,ARMv8-ThunderX,ARMv8-TX2,A64FX,ARMv9-Grace # IBM: Power8,Power9 # AMD-GPUS: AMD_GFX906,AMD_GFX908,AMD_GFX90A,AMD_GFX940,AMD_GFX942,AMD_GFX942_APU,AMD_GFX1030,AMD_GFX1100,AMD_GFX1103 -# AMD-CPUS: AMDAVX,Zen,Zen2,Zen3 +# AMD-CPUS: AMDAVX,Zen,Zen2,Zen3,Zen4 # Intel-GPUs: Intel_Gen,Intel_Gen9,Intel_Gen11,Intel_Gen12LP,Intel_DG1,Intel_XeHP,Intel_PVC KOKKOS_ARCH ?= "" # Options: yes,no @@ -442,11 +450,14 @@ KOKKOS_INTERNAL_USE_ARCH_IBM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_ # AMD based. KOKKOS_INTERNAL_USE_ARCH_AMDAVX := $(call kokkos_has_string,$(KOKKOS_ARCH),AMDAVX) +KOKKOS_INTERNAL_USE_ARCH_ZEN4 := $(call kokkos_has_string,$(KOKKOS_ARCH),Zen4) KOKKOS_INTERNAL_USE_ARCH_ZEN3 := $(call kokkos_has_string,$(KOKKOS_ARCH),Zen3) KOKKOS_INTERNAL_USE_ARCH_ZEN2 := $(call kokkos_has_string,$(KOKKOS_ARCH),Zen2) -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ZEN3), 0) - ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ZEN2), 0) - KOKKOS_INTERNAL_USE_ARCH_ZEN := $(call kokkos_has_string,$(KOKKOS_ARCH),Zen) +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ZEN4), 0) + ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ZEN3), 0) + ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ZEN2), 0) + KOKKOS_INTERNAL_USE_ARCH_ZEN := $(call kokkos_has_string,$(KOKKOS_ARCH),Zen) + endif endif endif @@ -463,8 +474,10 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX90A), 0) KOKKOS_INTERNAL_USE_ARCH_AMD_GFX90A := $(call kokkos_has_string,$(KOKKOS_ARCH),VEGA90A) endif KOKKOS_INTERNAL_USE_ARCH_AMD_GFX940 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX940) -KOKKOS_INTERNAL_USE_ARCH_AMD_GFX942 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX942) KOKKOS_INTERNAL_USE_ARCH_AMD_GFX942_APU := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX942_APU) +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX942_APU), 0) + KOKKOS_INTERNAL_USE_ARCH_AMD_GFX942 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX942) +endif KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1030 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX1030) ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1030), 0) KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1030 := $(call kokkos_has_string,$(KOKKOS_ARCH),NAVI1030) @@ -857,6 +870,19 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ZEN3), 1) endif endif +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ZEN4), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_ZEN4") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX512XEON") + + ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1) + KOKKOS_CXXFLAGS += -xCORE-AVX512 + KOKKOS_LDFLAGS += -xCORE-AVX512 + else + KOKKOS_CXXFLAGS += -march=znver4 -mtune=znver4 + KOKKOS_LDFLAGS += -march=znver4 -mtune=znver4 + endif +endif + ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARMV80") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARMV8_THUNDERX") diff --git a/lib/kokkos/README.md b/lib/kokkos/README.md index 56159b35c2..13d99c0bad 100644 --- a/lib/kokkos/README.md +++ b/lib/kokkos/README.md @@ -18,24 +18,24 @@ Kokkos is a [Linux Foundation](https://linuxfoundation.org) project. To start learning about Kokkos: -- [Kokkos Lectures](https://kokkos.org/kokkos-core-wiki/videolectures.html): they contain a mix of lecture videos and hands-on exercises covering all the important capabilities. +- [Kokkos Lectures](https://kokkos.org/kokkos-core-wiki/tutorials-and-examples/video-lectures.html): they contain a mix of lecture videos and hands-on exercises covering all the important capabilities. - [Programming guide](https://kokkos.org/kokkos-core-wiki/programmingguide.html): contains in "narrative" form a technical description of the programming model, machine model, and the main building blocks like the Views and parallel dispatch. - [API reference](https://kokkos.org/kokkos-core-wiki/): organized by category, i.e., [core](https://kokkos.org/kokkos-core-wiki/API/core-index.html), [algorithms](https://kokkos.org/kokkos-core-wiki/API/algorithms-index.html) and [containers](https://kokkos.org/kokkos-core-wiki/API/containers-index.html) or, if you prefer, in [alphabetical order](https://kokkos.org/kokkos-core-wiki/API/alphabetical.html). -- [Use cases and Examples](https://kokkos.org/kokkos-core-wiki/usecases.html): a serie of examples ranging from how to use Kokkos with MPI to Fortran interoperability. +- [Use cases and Examples](https://kokkos.org/kokkos-core-wiki/tutorials-and-examples/use-cases-and-examples.html): a serie of examples ranging from how to use Kokkos with MPI to Fortran interoperability. ## Obtaining Kokkos The latest release of Kokkos can be obtained from the [GitHub releases page](https://github.com/kokkos/kokkos/releases/latest). -The current release is [4.5.01](https://github.com/kokkos/kokkos/releases/tag/4.5.01). +The current release is [4.6.00](https://github.com/kokkos/kokkos/releases/tag/4.6.00). ```bash -curl -OJ -L https://github.com/kokkos/kokkos/releases/download/4.5.01/kokkos-4.5.01.tar.gz +curl -OJ -L https://github.com/kokkos/kokkos/releases/download/4.6.00/kokkos-4.6.00.tar.gz # Or with wget -wget https://github.com/kokkos/kokkos/releases/download/4.5.01/kokkos-4.5.01.tar.gz +wget https://github.com/kokkos/kokkos/releases/download/4.6.00/kokkos-4.6.00.tar.gz ``` To clone the latest development version of Kokkos from GitHub: @@ -47,7 +47,7 @@ git clone -b develop https://github.com/kokkos/kokkos.git ### Building Kokkos To build Kokkos, you will need to have a C++ compiler that supports C++17 or later. -All requirements including minimum and primary tested compiler versions can be found [here](https://kokkos.org/kokkos-core-wiki/requirements.html). +All requirements including minimum and primary tested compiler versions can be found [here](https://kokkos.org/kokkos-core-wiki/get-started/requirements.html). Building and installation instructions are described [here](https://kokkos.org/kokkos-core-wiki/building.html). diff --git a/lib/kokkos/algorithms/CMakeLists.txt b/lib/kokkos/algorithms/CMakeLists.txt index 73ce9f7ec5..e257e4ccce 100644 --- a/lib/kokkos/algorithms/CMakeLists.txt +++ b/lib/kokkos/algorithms/CMakeLists.txt @@ -5,3 +5,7 @@ endif() if(NOT ((KOKKOS_ENABLE_OPENMPTARGET AND KOKKOS_CXX_COMPILER_ID STREQUAL NVHPC) OR KOKKOS_ENABLE_OPENACC)) kokkos_add_test_directories(unit_tests) endif() + +if(Kokkos_ENABLE_BENCHMARKS) + add_subdirectory(perf_test) +endif() diff --git a/lib/kokkos/algorithms/perf_test/CMakeLists.txt b/lib/kokkos/algorithms/perf_test/CMakeLists.txt new file mode 100644 index 0000000000..a41d3f891b --- /dev/null +++ b/lib/kokkos/algorithms/perf_test/CMakeLists.txt @@ -0,0 +1,63 @@ +# FIXME: The following logic should be moved from here and also from `core/perf_test/CMakeLists.txt` to +# the root `CMakeLists.txt` in the form of a macro +# Find or download google/benchmark library +find_package(benchmark QUIET 1.5.6) +if(benchmark_FOUND) + message(STATUS "Using google benchmark found in ${benchmark_DIR}") +else() + message(STATUS "No installed google benchmark found, fetching from GitHub") + include(FetchContent) + set(BENCHMARK_ENABLE_TESTING OFF) + + list(APPEND CMAKE_MESSAGE_INDENT "[benchmark] ") + FetchContent_Declare( + googlebenchmark + DOWNLOAD_EXTRACT_TIMESTAMP FALSE + URL https://github.com/google/benchmark/archive/refs/tags/v1.7.1.tar.gz + URL_HASH MD5=0459a6c530df9851bee6504c3e37c2e7 + ) + FetchContent_MakeAvailable(googlebenchmark) + list(POP_BACK CMAKE_MESSAGE_INDENT) + + # Suppress clang-tidy diagnostics on code that we do not have control over + if(CMAKE_CXX_CLANG_TIDY) + set_target_properties(benchmark PROPERTIES CXX_CLANG_TIDY "") + endif() + + # FIXME: Check whether the following target_compile_options are needed. + # If so, clarify why. + target_compile_options(benchmark PRIVATE -w) + target_compile_options(benchmark_main PRIVATE -w) +endif() + +# FIXME: This function should be moved from here and also from `core/perf_test/CMakeLists.txt` to +# the root `CMakeLists.txt` +# FIXME: Could NAME be a one_value_keyword specified in cmake_parse_arguments? +function(KOKKOS_ADD_BENCHMARK NAME) + cmake_parse_arguments(BENCHMARK "" "" "SOURCES" ${ARGN}) + if(DEFINED BENCHMARK_UNPARSED_ARGUMENTS) + message(WARNING "Unexpected arguments when adding a benchmark: " ${BENCHMARK_UNPARSED_ARGUMENTS}) + endif() + + set(BENCHMARK_NAME Kokkos_${NAME}) + # FIXME: BenchmarkMain.cpp and Benchmark_Context.cpp should be moved to a common location from which + # they can be used by all performance tests. + list(APPEND BENCHMARK_SOURCES ../../core/perf_test/BenchmarkMain.cpp ../../core/perf_test/Benchmark_Context.cpp) + + add_executable(${BENCHMARK_NAME} ${BENCHMARK_SOURCES}) + target_link_libraries(${BENCHMARK_NAME} PRIVATE benchmark::benchmark Kokkos::kokkos impl_git_version) + target_include_directories(${BENCHMARK_NAME} SYSTEM PRIVATE ${benchmark_SOURCE_DIR}/include) + + # FIXME: This alone will not work. It might need an architecture and standard which need to be defined on target level. + # It will potentially go away with #7582. + foreach(SOURCE_FILE ${BENCHMARK_SOURCES}) + set_source_files_properties(${SOURCE_FILE} PROPERTIES LANGUAGE ${KOKKOS_COMPILE_LANGUAGE}) + endforeach() + + string(TIMESTAMP BENCHMARK_TIME "%Y-%m-%d_T%H-%M-%S" UTC) + set(BENCHMARK_ARGS --benchmark_counters_tabular=true --benchmark_out=${BENCHMARK_NAME}_${BENCHMARK_TIME}.json) + + add_test(NAME ${BENCHMARK_NAME} COMMAND ${BENCHMARK_NAME} ${BENCHMARK_ARGS}) +endfunction() + +kokkos_add_benchmark(PerformanceTest_InclusiveScan SOURCES test_inclusive_scan.cpp) diff --git a/lib/kokkos/algorithms/perf_test/test_inclusive_scan.cpp b/lib/kokkos/algorithms/perf_test/test_inclusive_scan.cpp new file mode 100644 index 0000000000..a0a5de6b07 --- /dev/null +++ b/lib/kokkos/algorithms/perf_test/test_inclusive_scan.cpp @@ -0,0 +1,191 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#include +#include +#include +#include + +#include + +#include +#include +#include +// FIXME: Benchmark_Context.hpp should be moved to a common location +#include "../../core/perf_test/Benchmark_Context.hpp" + +namespace { + +namespace KE = Kokkos::Experimental; + +using ExecSpace = Kokkos::DefaultExecutionSpace; +using HostExecSpace = Kokkos::DefaultHostExecutionSpace; + +// A tag struct to identify when inclusive scan with the implicit sum +// based binary operation needs to be called. +template +struct ImpSumBinOp; + +template +struct SumFunctor { + KOKKOS_FUNCTION + ValueType operator()(const ValueType& a, const ValueType& b) const { + return (a + b); + } +}; + +template +struct MaxFunctor { + KOKKOS_FUNCTION + ValueType operator()(const ValueType& a, const ValueType& b) const { + if (a > b) + return a; + else + return b; + } +}; + +// Helper to obtain last element of a view +template +T obtain_last_elem(const Kokkos::View& v) { + T last_element; + Kokkos::deep_copy(last_element, Kokkos::subview(v, v.extent(0) - 1)); + return last_element; +} + +// Helper to allocate input and output views +template +auto prepare_views(const std::size_t kProbSize) { + Kokkos::View in{"input", kProbSize}; + Kokkos::View out{"output", kProbSize}; + + auto h_in = Kokkos::create_mirror_view(in); + + for (std::size_t i = 0; i < kProbSize; ++i) { + h_in(i) = i; + } + + Kokkos::deep_copy(in, h_in); + + return std::make_tuple(in, out, h_in); +} + +// Perform scan with a reference implementation +template > +T ref_scan(const ViewType& h_in, ScanFunctor scan_functor = ScanFunctor()) { + std::size_t view_size = h_in.extent(0); + + Kokkos::View h_out("output", view_size); + + // FIXME: We have GCC 8.4.0 based check in our ORNL Jenkins CI. + // std::inclusive_scan is available only from GCC 9.3. Since, GCC 9.1 + // std::inclusive_scan that takes execution policy is available. However, + // there is error with header before GCC 10.1. + h_out(0) = h_in(0); + + for (std::size_t i = 1; i < view_size; ++i) { + h_out(i) = scan_functor(h_in(i), h_out(i - 1)); + } + + return h_out(view_size - 1); +} + +// Inclusive Scan with default binary operation (sum) or user provided functor +// Note: The nature of the functor must be compatible with the +// elements in the input and output views +template class ScanFunctor = ImpSumBinOp> +auto inclusive_scan(const Kokkos::View& in, + const Kokkos::View& out, T res_check) { + ExecSpace().fence(); + Kokkos::Timer timer; + + if constexpr (std::is_same_v, ImpSumBinOp>) { + KE::inclusive_scan("Default scan", ExecSpace(), KE::cbegin(in), + KE::cend(in), KE::begin(out)); + } else { + KE::inclusive_scan("Scan using a functor", ExecSpace(), KE::cbegin(in), + KE::cend(in), KE::begin(out), ScanFunctor()); + } + + ExecSpace().fence(); + double time_scan = timer.seconds(); + + T res_scan = obtain_last_elem(out); + bool passed = (res_check == res_scan); + + return std::make_tuple(time_scan, passed); +} + +// Benchmark: Inclusive Scan with default binary operation (sum) +// or user provided functor +template class ScanFunctor = ImpSumBinOp> +void BM_inclusive_scan(benchmark::State& state) { + const std::size_t kProbSize = state.range(0); + + auto [in, out, h_in] = prepare_views(kProbSize); + + T res_check; + + if constexpr (std::is_same_v, ImpSumBinOp>) { + res_check = ref_scan(h_in); + } else { + res_check = ref_scan(h_in, ScanFunctor()); + } + + double time_scan = 0.; + bool passed = false; + + for (auto _ : state) { + if constexpr (std::is_same_v, ImpSumBinOp>) { + std::tie(time_scan, passed) = inclusive_scan(in, out, res_check); + } else { + std::tie(time_scan, passed) = + inclusive_scan(in, out, res_check); + } + + KokkosBenchmark::report_results(state, in, 2, time_scan); + state.counters["Passed"] = passed; + } +} + +constexpr std::size_t PROB_SIZE = 100'000'000; + +} // anonymous namespace + +// FIXME: Add logic to pass min. warm-up time. Also, the value should be set +// by the user. Say, via the environment variable BENCHMARK_MIN_WARMUP_TIME. + +BENCHMARK(BM_inclusive_scan)->Arg(PROB_SIZE)->UseManualTime(); +BENCHMARK(BM_inclusive_scan)->Arg(PROB_SIZE)->UseManualTime(); +BENCHMARK(BM_inclusive_scan)->Arg(PROB_SIZE)->UseManualTime(); +BENCHMARK(BM_inclusive_scan) + ->Arg(PROB_SIZE) + ->UseManualTime(); +BENCHMARK(BM_inclusive_scan) + ->Arg(PROB_SIZE) + ->UseManualTime(); +BENCHMARK(BM_inclusive_scan) + ->Arg(PROB_SIZE) + ->UseManualTime(); +BENCHMARK(BM_inclusive_scan) + ->Arg(PROB_SIZE) + ->UseManualTime(); +BENCHMARK(BM_inclusive_scan) + ->Arg(PROB_SIZE) + ->UseManualTime(); +BENCHMARK(BM_inclusive_scan) + ->Arg(PROB_SIZE) + ->UseManualTime(); diff --git a/lib/kokkos/algorithms/src/Kokkos_Random.hpp b/lib/kokkos/algorithms/src/Kokkos_Random.hpp index b28ea4c2ca..54a853fa55 100644 --- a/lib/kokkos/algorithms/src/Kokkos_Random.hpp +++ b/lib/kokkos/algorithms/src/Kokkos_Random.hpp @@ -587,11 +587,13 @@ struct Random_XorShift1024_State { int state_idx) : state_(&v(state_idx, 0)), stride_(v.stride_1()) {} + // NOLINTBEGIN(bugprone-implicit-widening-of-multiplication-result) KOKKOS_FUNCTION uint64_t operator[](const int i) const { return state_[i * stride_]; } KOKKOS_FUNCTION uint64_t& operator[](const int i) { return state_[i * stride_]; } + // NOLINTEND(bugprone-implicit-widening-of-multiplication-result) }; template @@ -670,7 +672,12 @@ struct Random_UniqueIndex> { View>; KOKKOS_FUNCTION static int get_state_idx(const locks_view_type& locks_) { +#if defined(KOKKOS_COMPILER_INTEL_LLVM) && \ + KOKKOS_COMPILER_INTEL_LLVM >= 20250000 + auto item = sycl::ext::oneapi::this_work_item::get_nd_item<3>(); +#else auto item = sycl::ext::oneapi::experimental::this_nd_item<3>(); +#endif std::size_t threadIdx[3] = {item.get_local_id(2), item.get_local_id(1), item.get_local_id(0)}; std::size_t blockIdx[3] = {item.get_group(2), item.get_group(1), diff --git a/lib/kokkos/algorithms/src/sorting/Kokkos_BinOpsPublicAPI.hpp b/lib/kokkos/algorithms/src/sorting/Kokkos_BinOpsPublicAPI.hpp index 8e7de32a07..b093b72ad6 100644 --- a/lib/kokkos/algorithms/src/sorting/Kokkos_BinOpsPublicAPI.hpp +++ b/lib/kokkos/algorithms/src/sorting/Kokkos_BinOpsPublicAPI.hpp @@ -45,7 +45,7 @@ struct BinOp1D { // For integral types the number of bins may be larger than the range // in which case we can exactly have one unique value per bin // and then don't need to sort bins. - if (std::is_integral::value && + if (std::is_integral_v && (static_cast(max) - static_cast(min)) <= static_cast(max_bins)) { mul_ = 1.; diff --git a/lib/kokkos/algorithms/src/sorting/Kokkos_SortPublicAPI.hpp b/lib/kokkos/algorithms/src/sorting/Kokkos_SortPublicAPI.hpp index 20026c77e4..308e9e3a00 100644 --- a/lib/kokkos/algorithms/src/sorting/Kokkos_SortPublicAPI.hpp +++ b/lib/kokkos/algorithms/src/sorting/Kokkos_SortPublicAPI.hpp @@ -53,13 +53,9 @@ void sort(const ExecutionSpace& exec, if constexpr (Impl::better_off_calling_std_sort_v) { exec.fence("Kokkos::sort without comparator use std::sort"); - if (view.span_is_contiguous()) { - std::sort(view.data(), view.data() + view.size()); - } else { - auto first = ::Kokkos::Experimental::begin(view); - auto last = ::Kokkos::Experimental::end(view); - std::sort(first, last); - } + auto first = ::Kokkos::Experimental::begin(view); + auto last = ::Kokkos::Experimental::end(view); + std::sort(first, last); } else { Impl::sort_device_view_without_comparator(exec, view); } @@ -111,13 +107,9 @@ void sort(const ExecutionSpace& exec, if constexpr (Impl::better_off_calling_std_sort_v) { exec.fence("Kokkos::sort with comparator use std::sort"); - if (view.span_is_contiguous()) { - std::sort(view.data(), view.data() + view.size(), comparator); - } else { - auto first = ::Kokkos::Experimental::begin(view); - auto last = ::Kokkos::Experimental::end(view); - std::sort(first, last, comparator); - } + auto first = ::Kokkos::Experimental::begin(view); + auto last = ::Kokkos::Experimental::end(view); + std::sort(first, last, comparator); } else { Impl::sort_device_view_with_comparator(exec, view, comparator); } diff --git a/lib/kokkos/algorithms/src/sorting/impl/Kokkos_SortByKeyImpl.hpp b/lib/kokkos/algorithms/src/sorting/impl/Kokkos_SortByKeyImpl.hpp index 2a8f761d9b..f17d254b0b 100644 --- a/lib/kokkos/algorithms/src/sorting/impl/Kokkos_SortByKeyImpl.hpp +++ b/lib/kokkos/algorithms/src/sorting/impl/Kokkos_SortByKeyImpl.hpp @@ -47,6 +47,7 @@ #ifdef _CubLog #undef _CubLog #endif +// NOLINTNEXTLINE(bugprone-reserved-identifier) #define _CubLog #include #include @@ -65,12 +66,24 @@ #include #endif -#if defined(KOKKOS_ENABLE_ONEDPL) && \ - (ONEDPL_VERSION_MAJOR > 2022 || \ - (ONEDPL_VERSION_MAJOR == 2022 && ONEDPL_VERSION_MINOR >= 2)) -#define KOKKOS_ONEDPL_HAS_SORT_BY_KEY +#ifdef KOKKOS_ENABLE_ONEDPL +#define KOKKOS_IMPL_ONEDPL_VERSION \ + ONEDPL_VERSION_MAJOR * 10000 + ONEDPL_VERSION_MINOR * 100 + \ + ONEDPL_VERSION_PATCH +#define KOKKOS_IMPL_ONEDPL_VERSION_GREATER_EQUAL(MAJOR, MINOR, PATCH) \ + (KOKKOS_IMPL_ONEDPL_VERSION >= ((MAJOR)*10000 + (MINOR)*100 + (PATCH))) + +#if KOKKOS_IMPL_ONEDPL_VERSION_GREATER_EQUAL(2022, 2, 0) +#define KOKKOS_IMPL_ONEDPL_HAS_SORT_BY_KEY +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wshadow" +#pragma GCC diagnostic ignored "-Wunused-local-typedef" +#pragma GCC diagnostic ignored "-Wunused-parameter" +#pragma GCC diagnostic ignored "-Wunused-variable" #include #include +#pragma GCC diagnostic pop +#endif #endif namespace Kokkos::Impl { @@ -141,12 +154,18 @@ void sort_by_key_rocthrust( #endif #if defined(KOKKOS_ENABLE_ONEDPL) + +#if KOKKOS_IMPL_ONEDPL_VERSION_GREATER_EQUAL(2022, 7, 1) +template +inline constexpr bool sort_on_device_v = true; +#else template inline constexpr bool sort_on_device_v = std::is_same_v || std::is_same_v; +#endif -#ifdef KOKKOS_ONEDPL_HAS_SORT_BY_KEY +#ifdef KOKKOS_IMPL_ONEDPL_HAS_SORT_BY_KEY template void sort_by_key_onedpl( @@ -154,6 +173,14 @@ void sort_by_key_onedpl( const Kokkos::View& keys, const Kokkos::View& values, MaybeComparator&&... maybeComparator) { + auto queue = exec.sycl_queue(); + auto policy = oneapi::dpl::execution::make_device_policy(queue); +#if KOKKOS_IMPL_ONEDPL_VERSION_GREATER_EQUAL(2022, 7, 1) + oneapi::dpl::sort_by_key(policy, ::Kokkos::Experimental::begin(keys), + ::Kokkos::Experimental::end(keys), + ::Kokkos::Experimental::begin(values), + std::forward(maybeComparator)...); +#else if (keys.stride(0) != 1 && values.stride(0) != 1) { Kokkos::abort( "SYCL sort_by_key only supports rank-1 Views with stride(0) = 1."); @@ -161,11 +188,10 @@ void sort_by_key_onedpl( // Can't use Experimental::begin/end here since the oneDPL then assumes that // the data is on the host. - auto queue = exec.sycl_queue(); - auto policy = oneapi::dpl::execution::make_device_policy(queue); const int n = keys.extent(0); oneapi::dpl::sort_by_key(policy, keys.data(), keys.data() + n, values.data(), std::forward(maybeComparator)...); +#endif } #endif #endif @@ -336,12 +362,18 @@ void sort_by_key_device_view_without_comparator( const Kokkos::SYCL& exec, const Kokkos::View& keys, const Kokkos::View& values) { -#ifdef KOKKOS_ONEDPL_HAS_SORT_BY_KEY +#ifdef KOKKOS_IMPL_ONEDPL_HAS_SORT_BY_KEY +#if KOKKOS_IMPL_ONEDPL_VERSION_GREATER_EQUAL(2022, 7, 1) + sort_by_key_onedpl(exec, keys, values); +#else if (keys.stride(0) == 1 && values.stride(0) == 1) sort_by_key_onedpl(exec, keys, values); else -#endif sort_by_key_via_sort(exec, keys, values); +#endif +#else + sort_by_key_via_sort(exec, keys, values); +#endif } #endif @@ -394,12 +426,18 @@ void sort_by_key_device_view_with_comparator( const Kokkos::View& keys, const Kokkos::View& values, const ComparatorType& comparator) { -#ifdef KOKKOS_ONEDPL_HAS_SORT_BY_KEY +#ifdef KOKKOS_IMPL_ONEDPL_HAS_SORT_BY_KEY +#if KOKKOS_IMPL_ONEDPL_VERSION_GREATER_EQUAL(2022, 7, 1) + sort_by_key_onedpl(exec, keys, values, comparator); +#else if (keys.stride(0) == 1 && values.stride(0) == 1) sort_by_key_onedpl(exec, keys, values, comparator); else -#endif sort_by_key_via_sort(exec, keys, values, comparator); +#endif +#else + sort_by_key_via_sort(exec, keys, values, comparator); +#endif } #endif @@ -416,7 +454,9 @@ sort_by_key_device_view_with_comparator( sort_by_key_via_sort(exec, keys, values, comparator); } -#undef KOKKOS_ONEDPL_HAS_SORT_BY_KEY +#undef KOKKOS_IMPL_ONEDPL_HAS_SORT_BY_KEY } // namespace Kokkos::Impl +#undef KOKKOS_IMPL_ONEDPL_VERSION +#undef KOKKOS_IMPL_ONEDPL_VERSION_GREATER_EQUAL #endif diff --git a/lib/kokkos/algorithms/src/sorting/impl/Kokkos_SortImpl.hpp b/lib/kokkos/algorithms/src/sorting/impl/Kokkos_SortImpl.hpp index 734ce450f6..fa7c28b4d0 100644 --- a/lib/kokkos/algorithms/src/sorting/impl/Kokkos_SortImpl.hpp +++ b/lib/kokkos/algorithms/src/sorting/impl/Kokkos_SortImpl.hpp @@ -51,6 +51,7 @@ #ifdef _CubLog #undef _CubLog #endif +// NOLINTNEXTLINE(bugprone-reserved-identifier) #define _CubLog #include #include @@ -70,8 +71,20 @@ #endif #if defined(KOKKOS_ENABLE_ONEDPL) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wshadow" +#pragma GCC diagnostic ignored "-Wunused-local-typedef" +#pragma GCC diagnostic ignored "-Wunused-parameter" +#pragma GCC diagnostic ignored "-Wunused-variable" #include #include +#pragma GCC diagnostic pop + +#define KOKKOS_IMPL_ONEDPL_VERSION \ + ONEDPL_VERSION_MAJOR * 10000 + ONEDPL_VERSION_MINOR * 100 + \ + ONEDPL_VERSION_PATCH +#define KOKKOS_IMPL_ONEDPL_VERSION_GREATER_EQUAL(MAJOR, MINOR, PATCH) \ + (KOKKOS_IMPL_ONEDPL_VERSION >= ((MAJOR)*10000 + (MINOR)*100 + (PATCH))) #endif namespace Kokkos { @@ -221,6 +234,10 @@ void sort_onedpl(const Kokkos::SYCL& space, "SYCL execution space is not able to access the memory space " "of the View argument!"); +#if KOKKOS_IMPL_ONEDPL_VERSION_GREATER_EQUAL(2022, 7, 1) + static_assert(ViewType::rank == 1, + "Kokkos::sort currently only supports rank-1 Views."); +#else static_assert( (ViewType::rank == 1) && (std::is_same_v || @@ -234,18 +251,26 @@ void sort_onedpl(const Kokkos::SYCL& space, if (view.stride(0) != 1) { Kokkos::abort("SYCL sort only supports rank-1 Views with stride(0) = 1."); } +#endif if (view.extent(0) <= 1) { return; } - // Can't use Experimental::begin/end here since the oneDPL then assumes that - // the data is on the host. auto queue = space.sycl_queue(); auto policy = oneapi::dpl::execution::make_device_policy(queue); + +#if KOKKOS_IMPL_ONEDPL_VERSION_GREATER_EQUAL(2022, 7, 1) + oneapi::dpl::sort(policy, ::Kokkos::Experimental::begin(view), + ::Kokkos::Experimental::end(view), + std::forward(maybeComparator)...); +#else + // Can't use Experimental::begin/end here since the oneDPL then assumes that + // the data is on the host. const int n = view.extent(0); oneapi::dpl::sort(policy, view.data(), view.data() + n, std::forward(maybeComparator)...); +#endif } #endif @@ -269,29 +294,19 @@ void copy_to_host_run_stdsort_copy_back( KE::copy(exec, view, view_dc); // run sort on the mirror of view_dc - auto mv_h = create_mirror_view_and_copy(Kokkos::HostSpace(), view_dc); - if (view.span_is_contiguous()) { - std::sort(mv_h.data(), mv_h.data() + mv_h.size(), - std::forward(maybeComparator)...); - } else { - auto first = KE::begin(mv_h); - auto last = KE::end(mv_h); - std::sort(first, last, std::forward(maybeComparator)...); - } + auto mv_h = create_mirror_view_and_copy(Kokkos::HostSpace(), view_dc); + auto first = KE::begin(mv_h); + auto last = KE::end(mv_h); + std::sort(first, last, std::forward(maybeComparator)...); Kokkos::deep_copy(exec, view_dc, mv_h); // copy back to argument view KE::copy(exec, KE::cbegin(view_dc), KE::cend(view_dc), KE::begin(view)); } else { auto view_h = create_mirror_view_and_copy(Kokkos::HostSpace(), view); - if (view.span_is_contiguous()) { - std::sort(view_h.data(), view_h.data() + view_h.size(), - std::forward(maybeComparator)...); - } else { - auto first = KE::begin(view_h); - auto last = KE::end(view_h); - std::sort(first, last, std::forward(maybeComparator)...); - } + auto first = KE::begin(view_h); + auto last = KE::end(view_h); + std::sort(first, last, std::forward(maybeComparator)...); Kokkos::deep_copy(exec, view, view_h); } } @@ -332,11 +347,15 @@ void sort_device_view_without_comparator( "sort_device_view_without_comparator: supports rank-1 Views " "with LayoutLeft, LayoutRight or LayoutStride"); +#if KOKKOS_IMPL_ONEDPL_VERSION_GREATER_EQUAL(2022, 7, 1) + sort_onedpl(exec, view); +#else if (view.stride(0) == 1) { sort_onedpl(exec, view); } else { copy_to_host_run_stdsort_copy_back(exec, view); } +#endif } #endif @@ -387,11 +406,15 @@ void sort_device_view_with_comparator( "sort_device_view_with_comparator: supports rank-1 Views " "with LayoutLeft, LayoutRight or LayoutStride"); +#if KOKKOS_IMPL_ONEDPL_VERSION_GREATER_EQUAL(2022, 7, 1) + sort_onedpl(exec, view, comparator); +#else if (view.stride(0) == 1) { sort_onedpl(exec, view, comparator); } else { copy_to_host_run_stdsort_copy_back(exec, view, comparator); } +#endif } #endif @@ -423,4 +446,7 @@ sort_device_view_with_comparator( } // namespace Impl } // namespace Kokkos + +#undef KOKKOS_IMPL_ONEDPL_VERSION +#undef KOKKOS_IMPL_ONEDPL_VERSION_GREATER_EQUAL #endif diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Constraints.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Constraints.hpp index da16141f5a..2e73ace8d5 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Constraints.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Constraints.hpp @@ -238,12 +238,9 @@ KOKKOS_INLINE_FUNCTION void expect_no_overlap( [[maybe_unused]] IteratorType2 s_first) { if constexpr (is_kokkos_iterator_v && is_kokkos_iterator_v) { - auto const view1 = first.view(); - auto const view2 = s_first.view(); - - std::size_t stride1 = view1.stride(0); - std::size_t stride2 = view2.stride(0); - ptrdiff_t first_diff = view1.data() - view2.data(); + std::size_t stride1 = first.stride(); + std::size_t stride2 = s_first.stride(); + ptrdiff_t first_diff = first.data() - s_first.data(); // FIXME If strides are not identical, checks may not be made // with the cost of O(1) @@ -251,8 +248,8 @@ KOKKOS_INLINE_FUNCTION void expect_no_overlap( // If first_diff == 0, there is already an overlap if (stride1 == stride2 || first_diff == 0) { [[maybe_unused]] bool is_no_overlap = (first_diff % stride1); - auto* first_pointer1 = view1.data(); - auto* first_pointer2 = view2.data(); + auto* first_pointer1 = first.data(); + auto* first_pointer2 = s_first.data(); [[maybe_unused]] auto* last_pointer1 = first_pointer1 + (last - first); [[maybe_unused]] auto* last_pointer2 = first_pointer2 + (last - first); KOKKOS_EXPECTS(first_pointer1 >= last_pointer2 || diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_CopyIf.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_CopyIf.hpp index ad7b8bb8ca..ef39be6366 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_CopyIf.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_CopyIf.hpp @@ -150,9 +150,8 @@ KOKKOS_FUNCTION OutputIterator copy_if_team_impl( return d_first + count; } -#if defined KOKKOS_COMPILER_INTEL || \ - (defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \ - !defined(KOKKOS_COMPILER_MSVC)) +#if defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \ + !defined(KOKKOS_COMPILER_MSVC) __builtin_unreachable(); #endif } diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ExclusiveScan.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ExclusiveScan.hpp index 6da992b4bb..08e04810f6 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ExclusiveScan.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_ExclusiveScan.hpp @@ -103,7 +103,7 @@ OutputIteratorType exclusive_scan_custom_op_exespace_impl( // aliases using index_type = typename InputIteratorType::difference_type; - using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor; + using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor; using func_type = TransformExclusiveScanFunctorWithValueWrapper< ExecutionSpace, index_type, ValueType, InputIteratorType, OutputIteratorType, BinaryOpType, unary_op_type>; @@ -177,7 +177,7 @@ KOKKOS_FUNCTION OutputIteratorType exclusive_scan_custom_op_team_impl( // aliases using exe_space = typename TeamHandleType::execution_space; - using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor; + using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor; using index_type = typename InputIteratorType::difference_type; using func_type = TransformExclusiveScanFunctorWithoutValueWrapper< exe_space, index_type, ValueType, InputIteratorType, OutputIteratorType, diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_IdentityReferenceUnaryFunctor.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_IdentityReferenceUnaryFunctor.hpp index 252511c5d0..928508fdfb 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_IdentityReferenceUnaryFunctor.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_IdentityReferenceUnaryFunctor.hpp @@ -23,10 +23,11 @@ namespace Kokkos { namespace Experimental { namespace Impl { -template struct StdNumericScanIdentityReferenceUnaryFunctor { - KOKKOS_FUNCTION - constexpr const ValueType& operator()(const ValueType& a) const { return a; } + template + KOKKOS_FUNCTION constexpr T&& operator()(T&& t) const { + return static_cast(t); + } }; } // namespace Impl diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_InclusiveScan.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_InclusiveScan.hpp index 0b4acec0fe..867d0b0266 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_InclusiveScan.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_InclusiveScan.hpp @@ -18,12 +18,60 @@ #define KOKKOS_STD_ALGORITHMS_INCLUSIVE_SCAN_IMPL_HPP #include +#include #include "Kokkos_Constraints.hpp" #include "Kokkos_HelperPredicates.hpp" #include #include #include +#if defined(KOKKOS_ENABLE_CUDA) + +// Workaround for `Instruction 'shfl' without '.sync' is not supported on +// .target sm_70 and higher from PTX ISA version 6.4`. +// Also see https://github.com/NVIDIA/cub/pull/170. +#if !defined(CUB_USE_COOPERATIVE_GROUPS) +#define CUB_USE_COOPERATIVE_GROUPS +#endif + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wshadow" +#pragma GCC diagnostic ignored "-Wsuggest-override" + +#if defined(KOKKOS_COMPILER_CLANG) +// Some versions of Clang fail to compile Thrust, failing with errors like +// this: +// /thrust/system/cuda/detail/core/agent_launcher.h:557:11: +// error: use of undeclared identifier 'va_printf' +// The exact combination of versions for Clang and Thrust (or CUDA) for this +// failure was not investigated, however even very recent version combination +// (Clang 10.0.0 and Cuda 10.0) demonstrated failure. +// +// Defining _CubLog here locally allows us to avoid that code path, however +// disabling some debugging diagnostics +#pragma push_macro("_CubLog") +#ifdef _CubLog +#undef _CubLog +#endif +// NOLINTNEXTLINE(bugprone-reserved-identifier) +#define _CubLog +#include +#include +#pragma pop_macro("_CubLog") +#else +#include +#include +#endif + +#pragma GCC diagnostic pop + +#endif + +#if defined(KOKKOS_ENABLE_ROCTHRUST) +#include +#include +#endif + namespace Kokkos { namespace Experimental { namespace Impl { @@ -101,9 +149,48 @@ struct InclusiveScanDefaultFunctor { } }; -// -// exespace impl -// +// ------------------------------------------------------------- +// inclusive_scan_default_op_exespace_impl +// ------------------------------------------------------------- + +#if defined(KOKKOS_ENABLE_CUDA) +template +OutputIteratorType inclusive_scan_default_op_exespace_impl( + const std::string& label, const Cuda& ex, InputIteratorType first_from, + InputIteratorType last_from, OutputIteratorType first_dest) { + const auto thrust_ex = thrust::cuda::par.on(ex.cuda_stream()); + + Kokkos::Profiling::pushRegion(label + " via thrust::inclusive_scan"); + + thrust::inclusive_scan(thrust_ex, first_from, last_from, first_dest); + + Kokkos::Profiling::popRegion(); + + const auto num_elements = thrust::distance(first_from, last_from); + + return first_dest + num_elements; +} +#endif + +#if defined(KOKKOS_ENABLE_ROCTHRUST) +template +OutputIteratorType inclusive_scan_default_op_exespace_impl( + const std::string& label, const HIP& ex, InputIteratorType first_from, + InputIteratorType last_from, OutputIteratorType first_dest) { + const auto thrust_ex = thrust::hip::par.on(ex.hip_stream()); + + Kokkos::Profiling::pushRegion(label + " via thrust::inclusive_scan"); + + thrust::inclusive_scan(thrust_ex, first_from, last_from, first_dest); + + Kokkos::Profiling::popRegion(); + + const auto num_elements = thrust::distance(first_from, last_from); + + return first_dest + num_elements; +} +#endif + template OutputIteratorType inclusive_scan_default_op_exespace_impl( @@ -132,11 +219,16 @@ OutputIteratorType inclusive_scan_default_op_exespace_impl( // run const auto num_elements = Kokkos::Experimental::distance(first_from, last_from); + + Kokkos::Profiling::pushRegion(label + " via Kokkos::parallel_scan"); + ::Kokkos::parallel_scan(label, RangePolicy(ex, 0, num_elements), func_type(first_from, first_dest)); ex.fence("Kokkos::inclusive_scan_default_op: fence after operation"); + Kokkos::Profiling::popRegion(); + // return return first_dest + num_elements; } @@ -144,6 +236,49 @@ OutputIteratorType inclusive_scan_default_op_exespace_impl( // ------------------------------------------------------------- // inclusive_scan_custom_binary_op_impl // ------------------------------------------------------------- + +#if defined(KOKKOS_ENABLE_CUDA) +template +OutputIteratorType inclusive_scan_custom_binary_op_exespace_impl( + const std::string& label, const Cuda& ex, InputIteratorType first_from, + InputIteratorType last_from, OutputIteratorType first_dest, + BinaryOpType binary_op) { + const auto thrust_ex = thrust::cuda::par.on(ex.cuda_stream()); + + Kokkos::Profiling::pushRegion(label + " via thrust::inclusive_scan"); + + thrust::inclusive_scan(thrust_ex, first_from, last_from, first_dest, + binary_op); + + Kokkos::Profiling::popRegion(); + + const auto num_elements = thrust::distance(first_from, last_from); + + return first_dest + num_elements; +} +#endif + +#if defined(KOKKOS_ENABLE_ROCTHRUST) +template +OutputIteratorType inclusive_scan_custom_binary_op_exespace_impl( + const std::string& label, const HIP& ex, InputIteratorType first_from, + InputIteratorType last_from, OutputIteratorType first_dest, + BinaryOpType binary_op) { + const auto thrust_ex = thrust::hip::par.on(ex.hip_stream()); + + Kokkos::Profiling::pushRegion(label + " via thrust::inclusive_scan"); + + thrust::inclusive_scan(thrust_ex, first_from, last_from, first_dest, + binary_op); + + Kokkos::Profiling::popRegion(); + + const auto num_elements = thrust::distance(first_from, last_from); + + return first_dest + num_elements; +} +#endif + template OutputIteratorType inclusive_scan_custom_binary_op_exespace_impl( @@ -160,7 +295,7 @@ OutputIteratorType inclusive_scan_custom_binary_op_exespace_impl( using index_type = typename InputIteratorType::difference_type; using value_type = std::remove_const_t; - using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor; + using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor; using func_type = ExeSpaceTransformInclusiveScanNoInitValueFunctor< ExecutionSpace, index_type, value_type, InputIteratorType, OutputIteratorType, BinaryOpType, unary_op_type>; @@ -168,11 +303,16 @@ OutputIteratorType inclusive_scan_custom_binary_op_exespace_impl( // run const auto num_elements = Kokkos::Experimental::distance(first_from, last_from); + + Kokkos::Profiling::pushRegion(label + " via Kokkos::parallel_scan"); + ::Kokkos::parallel_scan( label, RangePolicy(ex, 0, num_elements), func_type(first_from, first_dest, binary_op, unary_op_type())); ex.fence("Kokkos::inclusive_scan_custom_binary_op: fence after operation"); + Kokkos::Profiling::popRegion(); + // return return first_dest + num_elements; } @@ -195,7 +335,7 @@ OutputIteratorType inclusive_scan_custom_binary_op_exespace_impl( // aliases using index_type = typename InputIteratorType::difference_type; - using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor; + using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor; using func_type = ExeSpaceTransformInclusiveScanWithInitValueFunctor< ExecutionSpace, index_type, ValueType, InputIteratorType, OutputIteratorType, BinaryOpType, unary_op_type>; @@ -203,12 +343,17 @@ OutputIteratorType inclusive_scan_custom_binary_op_exespace_impl( // run const auto num_elements = Kokkos::Experimental::distance(first_from, last_from); + + Kokkos::Profiling::pushRegion(label + " via Kokkos::parallel_scan"); + ::Kokkos::parallel_scan(label, RangePolicy(ex, 0, num_elements), func_type(first_from, first_dest, binary_op, unary_op_type(), std::move(init_value))); ex.fence("Kokkos::inclusive_scan_custom_binary_op: fence after operation"); + Kokkos::Profiling::popRegion(); + // return return first_dest + num_elements; } @@ -283,7 +428,7 @@ KOKKOS_FUNCTION OutputIteratorType inclusive_scan_custom_binary_op_team_impl( // aliases using exe_space = typename TeamHandleType::execution_space; - using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor; + using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor; using func_type = TeamTransformInclusiveScanNoInitValueFunctor< exe_space, value_type, InputIteratorType, OutputIteratorType, BinaryOpType, unary_op_type>; @@ -291,7 +436,6 @@ KOKKOS_FUNCTION OutputIteratorType inclusive_scan_custom_binary_op_team_impl( // run const auto num_elements = Kokkos::Experimental::distance(first_from, last_from); - ::Kokkos::parallel_scan( TeamThreadRange(teamHandle, 0, num_elements), func_type(first_from, first_dest, binary_op, unary_op_type())); @@ -325,7 +469,7 @@ KOKKOS_FUNCTION OutputIteratorType inclusive_scan_custom_binary_op_team_impl( // aliases using exe_space = typename TeamHandleType::execution_space; - using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor; + using unary_op_type = StdNumericScanIdentityReferenceUnaryFunctor; using func_type = TeamTransformInclusiveScanWithInitValueFunctor< exe_space, ValueType, InputIteratorType, OutputIteratorType, BinaryOpType, unary_op_type>; diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_RandomAccessIterator.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_RandomAccessIterator.hpp index e8c638c94c..c504673c3d 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_RandomAccessIterator.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_RandomAccessIterator.hpp @@ -18,6 +18,7 @@ #define KOKKOS_RANDOM_ACCESS_ITERATOR_IMPL_HPP #include +#include // declval #include #include #include "Kokkos_Constraints.hpp" @@ -29,8 +30,29 @@ namespace Impl { template class RandomAccessIterator; +namespace { + +template +struct is_always_strided { + static_assert(is_view_v); + + constexpr static bool value = +#ifdef KOKKOS_ENABLE_IMPL_MDSPAN + decltype(std::declval().to_mdspan())::is_always_strided(); +#else + (std::is_same_v || + std::is_same_v || + std::is_same_v); +#endif +}; + +} // namespace + template -class RandomAccessIterator< ::Kokkos::View > { +class RandomAccessIterator<::Kokkos::View> { public: using view_type = ::Kokkos::View; using iterator_type = RandomAccessIterator; @@ -41,30 +63,31 @@ class RandomAccessIterator< ::Kokkos::View > { using pointer = typename view_type::pointer_type; using reference = typename view_type::reference_type; +// oneDPL needs this alias in order not to assume the data is on the host but on +// the device, see +// https://github.com/uxlfoundation/oneDPL/blob/a045eac689f9107f50ba7b42235e9e927118e483/include/oneapi/dpl/pstl/hetero/dpcpp/utils_ranges_sycl.h#L210-L214 +#ifdef KOKKOS_ENABLE_ONEDPL + using is_passed_directly = std::true_type; +#endif + static_assert(view_type::rank == 1 && - (std::is_same_v || - std::is_same_v || - std::is_same_v), - "RandomAccessIterator only supports 1D Views with LayoutLeft, " - "LayoutRight, LayoutStride."); + is_always_strided<::Kokkos::View>::value); KOKKOS_DEFAULTED_FUNCTION RandomAccessIterator() = default; explicit KOKKOS_FUNCTION RandomAccessIterator(const view_type view) - : m_view(view) {} + : m_data(view.data()), m_stride(view.stride_0()) {} explicit KOKKOS_FUNCTION RandomAccessIterator(const view_type view, ptrdiff_t current_index) - : m_view(view), m_current_index(current_index) {} + : m_data(view.data() + current_index * view.stride_0()), + m_stride(view.stride_0()) {} #ifndef KOKKOS_ENABLE_CXX17 // C++20 and beyond template requires(std::is_constructible_v) KOKKOS_FUNCTION explicit(!std::is_convertible_v) RandomAccessIterator(const RandomAccessIterator& other) - : m_view(other.m_view), m_current_index(other.m_current_index) {} + : m_data(other.m_data), m_stride(other.m_stride) {} #else template < class OtherViewType, @@ -73,19 +96,22 @@ class RandomAccessIterator< ::Kokkos::View > { int> = 0> KOKKOS_FUNCTION explicit RandomAccessIterator( const RandomAccessIterator& other) - : m_view(other.m_view), m_current_index(other.m_current_index) {} + : m_data(other.m_data), m_stride(other.m_stride) {} template , int> = 0> KOKKOS_FUNCTION RandomAccessIterator( const RandomAccessIterator& other) - : m_view(other.m_view), m_current_index(other.m_current_index) {} + : m_data(other.m_data), m_stride(other.m_stride) {} #endif KOKKOS_FUNCTION iterator_type& operator++() { - ++m_current_index; + if constexpr (is_always_contiguous) + m_data++; + else + m_data += m_stride; return *this; } @@ -98,7 +124,10 @@ class RandomAccessIterator< ::Kokkos::View > { KOKKOS_FUNCTION iterator_type& operator--() { - --m_current_index; + if constexpr (is_always_contiguous) + m_data--; + else + m_data -= m_stride; return *this; } @@ -111,77 +140,95 @@ class RandomAccessIterator< ::Kokkos::View > { KOKKOS_FUNCTION reference operator[](difference_type n) const { - return m_view(m_current_index + n); + if constexpr (is_always_contiguous) + return *(m_data + n); + else + return *(m_data + n * m_stride); } KOKKOS_FUNCTION iterator_type& operator+=(difference_type n) { - m_current_index += n; + if constexpr (is_always_contiguous) + m_data += n; + else + m_data += n * m_stride; return *this; } KOKKOS_FUNCTION iterator_type& operator-=(difference_type n) { - m_current_index -= n; + if constexpr (is_always_contiguous) + m_data -= n; + else + m_data -= n * m_stride; return *this; } KOKKOS_FUNCTION iterator_type operator+(difference_type n) const { - return iterator_type(m_view, m_current_index + n); + auto it = *this; + it += n; + return it; + } + + friend iterator_type operator+(difference_type n, iterator_type other) { + return other + n; } KOKKOS_FUNCTION iterator_type operator-(difference_type n) const { - return iterator_type(m_view, m_current_index - n); + auto it = *this; + it -= n; + return it; } KOKKOS_FUNCTION difference_type operator-(iterator_type it) const { - return m_current_index - it.m_current_index; + if constexpr (is_always_contiguous) + return m_data - it.m_data; + else + return (m_data - it.m_data) / m_stride; } KOKKOS_FUNCTION bool operator==(iterator_type other) const { - return m_current_index == other.m_current_index && - m_view.data() == other.m_view.data(); + return m_data == other.m_data && m_stride == other.m_stride; } KOKKOS_FUNCTION bool operator!=(iterator_type other) const { - return m_current_index != other.m_current_index || - m_view.data() != other.m_view.data(); + return m_data != other.m_data || m_stride != other.m_stride; } KOKKOS_FUNCTION - bool operator<(iterator_type other) const { - return m_current_index < other.m_current_index; - } + bool operator<(iterator_type other) const { return m_data < other.m_data; } KOKKOS_FUNCTION - bool operator<=(iterator_type other) const { - return m_current_index <= other.m_current_index; - } + bool operator<=(iterator_type other) const { return m_data <= other.m_data; } KOKKOS_FUNCTION - bool operator>(iterator_type other) const { - return m_current_index > other.m_current_index; - } + bool operator>(iterator_type other) const { return m_data > other.m_data; } KOKKOS_FUNCTION - bool operator>=(iterator_type other) const { - return m_current_index >= other.m_current_index; - } + bool operator>=(iterator_type other) const { return m_data >= other.m_data; } KOKKOS_FUNCTION - reference operator*() const { return m_view(m_current_index); } + reference operator*() const { return *m_data; } KOKKOS_FUNCTION - view_type view() const { return m_view; } + pointer data() const { return m_data; } + + KOKKOS_FUNCTION + int stride() const { return m_stride; } private: - view_type m_view; - ptrdiff_t m_current_index = 0; + pointer m_data; + int m_stride; + static constexpr bool is_always_contiguous = + (std::is_same_v || + std::is_same_v); // Needed for the converting constructor accepting another iterator template @@ -192,4 +239,10 @@ class RandomAccessIterator< ::Kokkos::View > { } // namespace Experimental } // namespace Kokkos +#ifdef KOKKOS_ENABLE_SYCL +template +struct sycl::is_device_copyable< + Kokkos::Experimental::Impl::RandomAccessIterator> : std::true_type {}; +#endif + #endif diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Unique.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Unique.hpp index 2863582458..75f3315473 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Unique.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_Unique.hpp @@ -52,13 +52,10 @@ struct StdUniqueFunctor { auto& val_i = m_first_from[i]; const auto& val_ip1 = m_first_from[i + 1]; - if (final_pass) { - if (!m_pred(val_i, val_ip1)) { + if (!m_pred(val_i, val_ip1)) { + if (final_pass) { m_first_dest[update] = std::move(val_i); } - } - - if (!m_pred(val_i, val_ip1)) { update += 1; } } @@ -188,6 +185,7 @@ KOKKOS_FUNCTION IteratorType unique_team_impl(const TeamHandleType& teamHandle, IteratorType result = first; IteratorType lfirst = first; while (++lfirst != last) { + // NOLINTNEXTLINE(bugprone-inc-dec-in-conditions) if (!pred(*result, *lfirst) && ++result != lfirst) { *result = std::move(*lfirst); } diff --git a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_UniqueCopy.hpp b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_UniqueCopy.hpp index 710d04805d..226fd49d16 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_UniqueCopy.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/impl/Kokkos_UniqueCopy.hpp @@ -175,9 +175,8 @@ KOKKOS_FUNCTION OutputIterator unique_copy_team_impl( d_first + count); } -#if defined KOKKOS_COMPILER_INTEL || \ - (defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \ - !defined(KOKKOS_COMPILER_MSVC)) +#if defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \ + !defined(KOKKOS_COMPILER_MSVC) __builtin_unreachable(); #endif } diff --git a/lib/kokkos/algorithms/unit_tests/Makefile b/lib/kokkos/algorithms/unit_tests/Makefile index d3946c149b..eaf616c5d6 100644 --- a/lib/kokkos/algorithms/unit_tests/Makefile +++ b/lib/kokkos/algorithms/unit_tests/Makefile @@ -18,6 +18,8 @@ LINK ?= $(CXX) LDFLAGS ?= override LDFLAGS += -lpthread +KOKKOS_USE_DEPRECATED_MAKEFILES=1 + include $(KOKKOS_PATH)/Makefile.kokkos KOKKOS_CXXFLAGS += -I$(GTEST_PATH) -I${KOKKOS_PATH}/algorithms/unit_tests -I${KOKKOS_PATH}/core/unit_test/category_files diff --git a/lib/kokkos/algorithms/unit_tests/TestRandom.hpp b/lib/kokkos/algorithms/unit_tests/TestRandom.hpp index 6960b912d0..ed9c2610b6 100644 --- a/lib/kokkos/algorithms/unit_tests/TestRandom.hpp +++ b/lib/kokkos/algorithms/unit_tests/TestRandom.hpp @@ -281,7 +281,7 @@ struct test_random_scalar { double covariance_eps = result.covariance / num_draws / 2 / variance_expect; #if defined(KOKKOS_BHALF_T_IS_FLOAT) && !KOKKOS_BHALF_T_IS_FLOAT - if (!std::is_same::value) { + if (!std::is_same_v) { #endif EXPECT_LT(std::abs(mean_eps), tolerance); EXPECT_LT(std::abs(variance_eps), 1.5 * tolerance); @@ -312,7 +312,7 @@ struct test_random_scalar { (result.covariance / HIST_DIM1D - covariance_expect) / mean_expect; #if defined(KOKKOS_HALF_T_IS_FLOAT) && !KOKKOS_HALF_T_IS_FLOAT - if (std::is_same::value) { + if (std::is_same_v) { mean_eps_expect = 0.0003; variance_eps_expect = 1.0; covariance_eps_expect = 5.0e4; @@ -320,7 +320,7 @@ struct test_random_scalar { #endif #if defined(KOKKOS_BHALF_T_IS_FLOAT) && !KOKKOS_BHALF_T_IS_FLOAT - if (!std::is_same::value) { + if (!std::is_same_v) { #endif EXPECT_LT(std::abs(mean_eps), mean_eps_expect); EXPECT_LT(std::abs(variance_eps), variance_eps_expect); @@ -358,13 +358,13 @@ struct test_random_scalar { (result.covariance / HIST_DIM1D - covariance_expect) / mean_expect; #if defined(KOKKOS_HALF_T_IS_FLOAT) && !KOKKOS_HALF_T_IS_FLOAT - if (std::is_same::value) { + if (std::is_same_v) { variance_factor = 7; } #endif #if defined(KOKKOS_BHALF_T_IS_FLOAT) && !KOKKOS_BHALF_T_IS_FLOAT - if (!std::is_same::value) { + if (!std::is_same_v) { #endif EXPECT_LT(std::abs(mean_eps), tolerance); EXPECT_LT(std::abs(variance_eps), variance_factor); diff --git a/lib/kokkos/algorithms/unit_tests/TestRandomAccessIterator.cpp b/lib/kokkos/algorithms/unit_tests/TestRandomAccessIterator.cpp index 5ab348cb19..65e45ebb96 100644 --- a/lib/kokkos/algorithms/unit_tests/TestRandomAccessIterator.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestRandomAccessIterator.cpp @@ -37,12 +37,18 @@ struct random_access_iterator_test : std_algorithms_test { TEST_F(random_access_iterator_test, constructor) { // just tests that constructor works - auto it1 = KE::Impl::RandomAccessIterator(m_static_view); - auto it2 = KE::Impl::RandomAccessIterator(m_dynamic_view); - auto it3 = KE::Impl::RandomAccessIterator(m_strided_view); - auto it4 = KE::Impl::RandomAccessIterator(m_static_view, 3); - auto it5 = KE::Impl::RandomAccessIterator(m_dynamic_view, 3); - auto it6 = KE::Impl::RandomAccessIterator(m_strided_view, 3); + [[maybe_unused]] auto it1 = + KE::Impl::RandomAccessIterator(m_static_view); + [[maybe_unused]] auto it2 = + KE::Impl::RandomAccessIterator(m_dynamic_view); + [[maybe_unused]] auto it3 = + KE::Impl::RandomAccessIterator(m_strided_view); + [[maybe_unused]] auto it4 = + KE::Impl::RandomAccessIterator(m_static_view, 3); + [[maybe_unused]] auto it5 = + KE::Impl::RandomAccessIterator(m_dynamic_view, 3); + [[maybe_unused]] auto it6 = + KE::Impl::RandomAccessIterator(m_strided_view, 3); EXPECT_TRUE(true); } diff --git a/lib/kokkos/algorithms/unit_tests/TestSort.hpp b/lib/kokkos/algorithms/unit_tests/TestSort.hpp index 5ea88ae5d6..562ff97e42 100644 --- a/lib/kokkos/algorithms/unit_tests/TestSort.hpp +++ b/lib/kokkos/algorithms/unit_tests/TestSort.hpp @@ -99,6 +99,7 @@ void test_dynamic_view_sort_impl(unsigned int n) { Kokkos::Experimental::DynamicView; using KeyViewType = Kokkos::View; + // NOLINTNEXTLINE(bugprone-implicit-widening-of-multiplication-result) const size_t upper_bound = 2 * n; const size_t min_chunk_size = 1024; diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsCommon.hpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsCommon.hpp index dadce2d474..d8a68f768a 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsCommon.hpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsCommon.hpp @@ -198,9 +198,8 @@ auto create_deep_copyable_compatible_view_with_same_extent(ViewType view) { // this is needed for intel to avoid // error #1011: missing return statement at end of non-void function -#if defined KOKKOS_COMPILER_INTEL || \ - (defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \ - !defined(KOKKOS_COMPILER_MSVC)) +#if defined(KOKKOS_COMPILER_NVCC) && KOKKOS_COMPILER_NVCC >= 1130 && \ + !defined(KOKKOS_COMPILER_MSVC) __builtin_unreachable(); #endif } diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsCompileOnly.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsCompileOnly.cpp index 9324db12f2..ddb7dc2a68 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsCompileOnly.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsCompileOnly.cpp @@ -507,6 +507,20 @@ struct TestStruct { } }; +#ifndef KOKKOS_ENABLE_CXX17 +template +constexpr bool +test_kokkos_iterator_satify_std_random_access_iterator_concept() { + return std::random_access_iterator< + Kokkos::Experimental::Impl::RandomAccessIterator>; +} + +static_assert(test_kokkos_iterator_satify_std_random_access_iterator_concept< + Kokkos::View>()); +static_assert(test_kokkos_iterator_satify_std_random_access_iterator_concept< + Kokkos::View>()); +#endif + } // namespace compileonly } // namespace stdalgos } // namespace Test diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsConstraints.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsConstraints.cpp index 923ea970f9..67d21dd740 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsConstraints.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsConstraints.cpp @@ -173,6 +173,7 @@ TEST(std_algorithms_DeathTest, expect_no_overlap) { KE::Impl::expect_no_overlap(sub_first_d0, sub_last_d0, sub_first_d1); + // NOLINTNEXTLINE(bugprone-implicit-widening-of-multiplication-result) Kokkos::LayoutStride layout2d{2, 3, extent0, 2 * 3}; Kokkos::View strided_view_2d{ "std-algo-test-2d-contiguous-view-strided", layout2d}; diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsExclusiveScan.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsExclusiveScan.cpp index a85e63fe34..1a81991c35 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsExclusiveScan.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsExclusiveScan.cpp @@ -171,7 +171,7 @@ struct VerifyData { create_mirror_view_and_copy(Kokkos::HostSpace(), test_view_dc); if (test_view_h.extent(0) > 0) { for (std::size_t i = 0; i < test_view_h.extent(0); ++i) { - if (std::is_same::value) { + if (std::is_same_v) { ASSERT_EQ(gold_h(i), test_view_h(i)); } else { const auto error = diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsInclusiveScan.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsInclusiveScan.cpp index b4f40b4651..c8ecc137e2 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsInclusiveScan.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsInclusiveScan.cpp @@ -184,7 +184,7 @@ struct VerifyData { const auto ext = test_view_h.extent(0); if (ext > 0) { for (std::size_t i = 0; i < ext; ++i) { - if (std::is_same::value) { + if (std::is_same_v) { ASSERT_EQ(gold_h(i), test_view_h(i)); } else { const auto error = diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsIsSortedUntil.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsIsSortedUntil.cpp index 8327bfe13c..9e30630f07 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsIsSortedUntil.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsIsSortedUntil.cpp @@ -153,12 +153,13 @@ void run_single_scenario(const InfoType& scenario_info) { #if !defined KOKKOS_ENABLE_OPENMPTARGET CustomLessThanComparator comp; - auto r5 = + [[maybe_unused]] auto r5 = KE::is_sorted_until(exespace(), KE::cbegin(view), KE::cend(view), comp); - auto r6 = KE::is_sorted_until("label", exespace(), KE::cbegin(view), - KE::cend(view), comp); - auto r7 = KE::is_sorted_until(exespace(), view, comp); - auto r8 = KE::is_sorted_until("label", exespace(), view, comp); + [[maybe_unused]] auto r6 = KE::is_sorted_until( + "label", exespace(), KE::cbegin(view), KE::cend(view), comp); + [[maybe_unused]] auto r7 = KE::is_sorted_until(exespace(), view, comp); + [[maybe_unused]] auto r8 = + KE::is_sorted_until("label", exespace(), view, comp); #endif ASSERT_EQ(r1, gold) << name << ", " << view_tag_to_string(Tag{}); diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsModOps.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsModOps.cpp index 6918185bc0..1fbeab3d9d 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsModOps.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsModOps.cpp @@ -53,13 +53,13 @@ TEST(std_algorithms_mod_ops_test, move) { // move constr MyMovableType b(std::move(a)); ASSERT_EQ(b.m_value, 11); - ASSERT_EQ(a.m_value, -2); + ASSERT_EQ(a.m_value, -2); // NOLINT(bugprone-use-after-move) // move assign MyMovableType c; c = std::move(b); ASSERT_EQ(c.m_value, 11); - ASSERT_EQ(b.m_value, -4); + ASSERT_EQ(b.m_value, -4); // NOLINT(bugprone-use-after-move) } template @@ -70,7 +70,7 @@ struct StdAlgoModSeqOpsTestMove { void operator()(const int index) const { typename ViewType::value_type a{11}; using move_t = decltype(std::move(a)); - static_assert(std::is_rvalue_reference::value); + static_assert(std::is_rvalue_reference_v); m_view(index) = std::move(a); } diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsNumerics.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsNumerics.cpp index 0933c4e135..a3d7df533b 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsNumerics.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsNumerics.cpp @@ -243,16 +243,15 @@ void run_and_check_transform_reduce_overloadA(ViewType1 first_view, ViewType2 second_view, ValueType init_value, ValueType result_value, - Args&&... args) { + Args const&... args) { // trivial cases const auto r1 = KE::transform_reduce( ExecutionSpace(), KE::cbegin(first_view), KE::cbegin(first_view), - KE::cbegin(second_view), init_value, std::forward(args)...); + KE::cbegin(second_view), init_value, args...); - const auto r2 = - KE::transform_reduce("MYLABEL", ExecutionSpace(), KE::cbegin(first_view), - KE::cbegin(first_view), KE::cbegin(second_view), - init_value, std::forward(args)...); + const auto r2 = KE::transform_reduce( + "MYLABEL", ExecutionSpace(), KE::cbegin(first_view), + KE::cbegin(first_view), KE::cbegin(second_view), init_value, args...); ASSERT_EQ(r1, init_value); ASSERT_EQ(r2, init_value); @@ -260,18 +259,16 @@ void run_and_check_transform_reduce_overloadA(ViewType1 first_view, // non trivial cases const auto r3 = KE::transform_reduce( ExecutionSpace(), KE::cbegin(first_view), KE::cend(first_view), - KE::cbegin(second_view), init_value, std::forward(args)...); + KE::cbegin(second_view), init_value, args...); const auto r4 = KE::transform_reduce( "MYLABEL", ExecutionSpace(), KE::cbegin(first_view), KE::cend(first_view), - KE::cbegin(second_view), init_value, std::forward(args)...); + KE::cbegin(second_view), init_value, args...); - const auto r5 = - KE::transform_reduce(ExecutionSpace(), first_view, second_view, - init_value, std::forward(args)...); - const auto r6 = - KE::transform_reduce("MYLABEL", ExecutionSpace(), first_view, second_view, - init_value, std::forward(args)...); + const auto r5 = KE::transform_reduce(ExecutionSpace(), first_view, + second_view, init_value, args...); + const auto r6 = KE::transform_reduce("MYLABEL", ExecutionSpace(), first_view, + second_view, init_value, args...); ASSERT_EQ(r3, result_value); ASSERT_EQ(r4, result_value); @@ -363,32 +360,30 @@ template void run_and_check_transform_reduce_overloadB(ViewType view, ValueType init_value, ValueType result_value, - Args&&... args) { + Args const&... args) { // trivial - const auto r1 = - KE::transform_reduce(ExecutionSpace(), KE::cbegin(view), KE::cbegin(view), - init_value, std::forward(args)...); + const auto r1 = KE::transform_reduce(ExecutionSpace(), KE::cbegin(view), + KE::cbegin(view), init_value, args...); - const auto r2 = KE::transform_reduce("MYLABEL", ExecutionSpace(), - KE::cbegin(view), KE::cbegin(view), - init_value, std::forward(args)...); + const auto r2 = + KE::transform_reduce("MYLABEL", ExecutionSpace(), KE::cbegin(view), + KE::cbegin(view), init_value, args...); ASSERT_EQ(r1, init_value); ASSERT_EQ(r2, init_value); // non trivial - const auto r3 = - KE::transform_reduce(ExecutionSpace(), KE::cbegin(view), KE::cend(view), - init_value, std::forward(args)...); + const auto r3 = KE::transform_reduce(ExecutionSpace(), KE::cbegin(view), + KE::cend(view), init_value, args...); - const auto r4 = KE::transform_reduce("MYLABEL", ExecutionSpace(), - KE::cbegin(view), KE::cend(view), - init_value, std::forward(args)...); - const auto r5 = KE::transform_reduce(ExecutionSpace(), view, init_value, - std::forward(args)...); + const auto r4 = + KE::transform_reduce("MYLABEL", ExecutionSpace(), KE::cbegin(view), + KE::cend(view), init_value, args...); + const auto r5 = + KE::transform_reduce(ExecutionSpace(), view, init_value, args...); const auto r6 = KE::transform_reduce("MYLABEL", ExecutionSpace(), view, - init_value, std::forward(args)...); + init_value, args...); ASSERT_EQ(r3, result_value); ASSERT_EQ(r4, result_value); diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsRotate.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsRotate.cpp index bf5c2ee782..b9545e8b2e 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsRotate.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsRotate.cpp @@ -196,7 +196,7 @@ void run_single_scenario(const InfoType& scenario_info, // create host copy BEFORE rotate or view will be modified auto view_h = create_host_space_copy(view); auto rit = KE::rotate(exespace(), view, rotation_point); - // verify_data(rit, view, view_h, rotation_point); + verify_data(rit, view, view_h, rotation_point); } { diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamAdjacentDifference.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamAdjacentDifference.cpp index 5a2c046939..1dfdcfd568 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamAdjacentDifference.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamAdjacentDifference.cpp @@ -191,6 +191,7 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) { ASSERT_EQ(stdDistance, distancesView_h(i)); break; } + default: Kokkos::abort("unreachable"); } } diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamAdjacentFind.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamAdjacentFind.cpp index 95f2934e01..88fc649a9b 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamAdjacentFind.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamAdjacentFind.cpp @@ -217,6 +217,7 @@ void test_A(const bool ensureAdjacentFindCanFind, std::size_t numTeams, break; } + default: Kokkos::abort("unreachable"); } } } diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamEqual.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamEqual.cpp index 82cce0b384..592bb4c864 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamEqual.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamEqual.cpp @@ -244,6 +244,7 @@ void test_A(const bool viewsAreEqual, std::size_t numTeams, std::size_t numCols, break; } + default: Kokkos::abort("unreachable"); } } } diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamExclusiveScan.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamExclusiveScan.cpp index 0c35c5e599..0c9f1e1bd2 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamExclusiveScan.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamExclusiveScan.cpp @@ -224,6 +224,7 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) { break; } #endif + default: Kokkos::abort("unreachable"); } #undef exclusive_scan diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamFindEnd.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamFindEnd.cpp index d350bc62cd..21a905be56 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamFindEnd.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamFindEnd.cpp @@ -227,6 +227,7 @@ void test_A(const bool sequencesExist, std::size_t numTeams, break; } + default: Kokkos::abort("unreachable"); } if (sequencesExist) { diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamFindFirstOf.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamFindFirstOf.cpp index e992882e91..ad1043362e 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamFindFirstOf.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamFindFirstOf.cpp @@ -244,6 +244,7 @@ void test_A(const bool sequencesExist, std::size_t numTeams, break; } + default: Kokkos::abort("unreachable"); } } } diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamFindIf.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamFindIf.cpp index 70f2be77f6..f21f947e97 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamFindIf.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamFindIf.cpp @@ -57,14 +57,7 @@ struct TestFunctorA { const auto myRowIndex = member.league_rank(); auto myRowViewFrom = Kokkos::subview(m_dataView, myRowIndex, Kokkos::ALL()); const auto val = m_greaterThanValuesView(myRowIndex); - // FIXME_INTEL -#if defined(KOKKOS_COMPILER_INTEL) && (1900 == KOKKOS_COMPILER_INTEL) - GreaterEqualFunctor< - typename GreaterThanValuesViewType::non_const_value_type> - unaryPred{val}; -#else GreaterEqualFunctor unaryPred{val}; -#endif ptrdiff_t resultDist = 0; switch (m_apiPick) { @@ -185,12 +178,7 @@ void test_A(const bool predicatesReturnTrue, std::size_t numTeams, const auto rowFromBegin = KE::cbegin(rowFrom); const auto rowFromEnd = KE::cend(rowFrom); const auto val = greaterEqualValuesView_h(i); - // FIXME_INTEL -#if defined(KOKKOS_COMPILER_INTEL) && (1900 == KOKKOS_COMPILER_INTEL) - const GreaterEqualFunctor unaryPred{val}; -#else const GreaterEqualFunctor unaryPred{val}; -#endif auto it = std::find_if(rowFromBegin, rowFromEnd, unaryPred); diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamFindIfNot.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamFindIfNot.cpp index 873e8faf4c..0794dc0a79 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamFindIfNot.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamFindIfNot.cpp @@ -57,14 +57,7 @@ struct TestFunctorA { const auto myRowIndex = member.league_rank(); auto myRowViewFrom = Kokkos::subview(m_dataView, myRowIndex, Kokkos::ALL()); const auto val = m_greaterThanValuesView(myRowIndex); - // FIXME_INTEL -#if defined(KOKKOS_COMPILER_INTEL) && (1900 == KOKKOS_COMPILER_INTEL) - GreaterEqualFunctor< - typename GreaterThanValuesViewType::non_const_value_type> - unaryPred{val}; -#else GreaterEqualFunctor unaryPred{val}; -#endif ptrdiff_t resultDist = 0; switch (m_apiPick) { @@ -180,12 +173,7 @@ void test_A(const bool predicatesReturnTrue, std::size_t numTeams, const auto rowFromBegin = KE::cbegin(rowFrom); const auto rowFromEnd = KE::cend(rowFrom); const auto val = greaterEqualValuesView_h(i); - // FIXME_INTEL -#if defined(KOKKOS_COMPILER_INTEL) && (1900 == KOKKOS_COMPILER_INTEL) - const GreaterEqualFunctor unaryPred{val}; -#else const GreaterEqualFunctor unaryPred{val}; -#endif auto it = std::find_if_not(rowFromBegin, rowFromEnd, unaryPred); diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamInclusiveScan.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamInclusiveScan.cpp index b5f4cdd612..4c77eff9c4 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamInclusiveScan.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamInclusiveScan.cpp @@ -253,6 +253,7 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) { break; } + default: Kokkos::abort("unreachable"); } #undef inclusive_scan diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamLexicographicalCompare.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamLexicographicalCompare.cpp index c377b9fec8..9d2d2721c6 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamLexicographicalCompare.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamLexicographicalCompare.cpp @@ -245,6 +245,7 @@ void test_A(const TestCaseType testCase, std::size_t numTeams, break; } + default: Kokkos::abort("unreachable"); } } } diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamMismatch.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamMismatch.cpp index 84269511d8..9b245508e3 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamMismatch.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamMismatch.cpp @@ -249,6 +249,7 @@ void test_A(const bool viewsAreEqual, std::size_t numTeams, std::size_t numCols, break; } + default: Kokkos::abort("unreachable"); } } } diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamReduce.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamReduce.cpp index eb00d9e083..88264b45c0 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamReduce.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamReduce.cpp @@ -242,6 +242,7 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) { break; } + default: Kokkos::abort("unreachable"); } #undef reduce diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamSearch.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamSearch.cpp index 039db4095d..1f0f4b6c1b 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamSearch.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamSearch.cpp @@ -243,6 +243,7 @@ void test_A(const bool sequencesExist, std::size_t numTeams, break; } + default: Kokkos::abort("unreachable"); } } } diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamSearchN.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamSearchN.cpp index 25cd1471e0..6d8a34e842 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamSearchN.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamSearchN.cpp @@ -258,6 +258,7 @@ void test_A(const bool sequencesExist, std::size_t numTeams, break; } + default: Kokkos::abort("unreachable"); } } } diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamTransformExclusiveScan.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamTransformExclusiveScan.cpp index 1c43854381..60e199a350 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamTransformExclusiveScan.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamTransformExclusiveScan.cpp @@ -203,6 +203,7 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) { ASSERT_EQ(stdDistance, distancesView_h(i)); break; } + default: Kokkos::abort("unreachable"); } #undef transform_exclusive_scan diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamTransformInclusiveScan.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamTransformInclusiveScan.cpp index 78a21c4430..0dc3e68b1d 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamTransformInclusiveScan.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamTransformInclusiveScan.cpp @@ -240,6 +240,7 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) { break; } + default: Kokkos::abort("unreachable"); } } #undef transform_inclusive_scan diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamTransformReduce.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamTransformReduce.cpp index 17ded226aa..3ad0b5b354 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamTransformReduce.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTeamTransformReduce.cpp @@ -293,6 +293,7 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) { break; } + default: Kokkos::abort("unreachable"); } #undef transform_reduce diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTransformExclusiveScan.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTransformExclusiveScan.cpp index 365ca21688..e3114daeae 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTransformExclusiveScan.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTransformExclusiveScan.cpp @@ -344,8 +344,7 @@ TEST(std_algorithms_numeric_ops_test, transform_exclusive_scan_functor) { using view_type = Kokkos::View; view_type dummy_view("dummy_view", 0); using unary_op_type = - Kokkos::Experimental::Impl::StdNumericScanIdentityReferenceUnaryFunctor< - int>; + Kokkos::Experimental::Impl::StdNumericScanIdentityReferenceUnaryFunctor; using functor_type = Kokkos::Experimental::Impl::TransformExclusiveScanFunctorWithValueWrapper< exespace, int, int, view_type, view_type, MultiplyFunctor, diff --git a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTransformInclusiveScan.cpp b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTransformInclusiveScan.cpp index cc87262147..2dda12e22d 100644 --- a/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTransformInclusiveScan.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestStdAlgorithmsTransformInclusiveScan.cpp @@ -390,8 +390,7 @@ TEST(std_algorithms_numeric_ops_test, transform_inclusive_scan_functor) { int dummy = 0; using view_type = Kokkos::View; view_type dummy_view("dummy_view", 0); - using unary_op_type = - KE::Impl::StdNumericScanIdentityReferenceUnaryFunctor; + using unary_op_type = KE::Impl::StdNumericScanIdentityReferenceUnaryFunctor; { using functor_type = KE::Impl::ExeSpaceTransformInclusiveScanNoInitValueFunctor< diff --git a/lib/kokkos/benchmarks/atomic/Makefile b/lib/kokkos/benchmarks/atomic/Makefile index 636c0ad4ab..c59de75ce8 100644 --- a/lib/kokkos/benchmarks/atomic/Makefile +++ b/lib/kokkos/benchmarks/atomic/Makefile @@ -2,6 +2,7 @@ KOKKOS_DEVICES=Cuda KOKKOS_CUDA_OPTIONS=enable_lambda KOKKOS_ARCH = "SNB,Volta70" +KOKKOS_USE_DEPRECATED_MAKEFILES=1 MAKEFILE_PATH := $(subst Makefile,,$(abspath $(lastword $(MAKEFILE_LIST)))) diff --git a/lib/kokkos/benchmarks/bytes_and_flops/Makefile b/lib/kokkos/benchmarks/bytes_and_flops/Makefile index 1aa4edddcd..4b6f084d20 100644 --- a/lib/kokkos/benchmarks/bytes_and_flops/Makefile +++ b/lib/kokkos/benchmarks/bytes_and_flops/Makefile @@ -2,6 +2,7 @@ KOKKOS_DEVICES=Cuda KOKKOS_CUDA_OPTIONS=enable_lambda KOKKOS_ARCH = "SNB,Volta70" +KOKKOS_USE_DEPRECATED_MAKEFILES=1 MAKEFILE_PATH := $(subst Makefile,,$(abspath $(lastword $(MAKEFILE_LIST)))) diff --git a/lib/kokkos/benchmarks/gather/Makefile b/lib/kokkos/benchmarks/gather/Makefile index 6827995bed..e1bfce21a6 100644 --- a/lib/kokkos/benchmarks/gather/Makefile +++ b/lib/kokkos/benchmarks/gather/Makefile @@ -2,6 +2,7 @@ KOKKOS_DEVICES=Cuda KOKKOS_CUDA_OPTIONS=enable_lambda KOKKOS_ARCH = "SNB,Volta70" +KOKKOS_USE_DEPRECATED_MAKEFILES=1 MAKEFILE_PATH := $(subst Makefile,,$(abspath $(lastword $(MAKEFILE_LIST)))) diff --git a/lib/kokkos/benchmarks/launch_latency/launch_latency.cpp b/lib/kokkos/benchmarks/launch_latency/launch_latency.cpp index 156c29af09..0935706ee8 100644 --- a/lib/kokkos/benchmarks/launch_latency/launch_latency.cpp +++ b/lib/kokkos/benchmarks/launch_latency/launch_latency.cpp @@ -37,7 +37,7 @@ template struct TestFunctor { - double values[V]; + double values[V] = {}; Kokkos::View a; int K; TestFunctor(Kokkos::View a_, int K_) : a(a_), K(K_) {} @@ -50,7 +50,7 @@ struct TestFunctor { template struct TestRFunctor { - double values[V]; + double values[V] = {}; Kokkos::View a; int K; TestRFunctor(Kokkos::View a_, int K_) : a(a_), K(K_) {} @@ -247,12 +247,15 @@ int main(int argc, char* argv[]) { // anything that doesn't start with -- if (arg.size() < 2 || (arg.size() >= 2 && arg[0] != '-' && arg[1] != '-')) { + // signing off that arg.data() is null terminated + // NOLINTBEGIN(bugprone-suspicious-stringview-data-usage) if (i == 1) N = atoi(arg.data()); else if (i == 2) M = atoi(arg.data()); else if (i == 3) K = atoi(arg.data()); + // NOLINTEND(bugprone-suspicious-stringview-data-usage) else { Kokkos::abort("unexpected argument!"); } diff --git a/lib/kokkos/benchmarks/policy_performance/Makefile b/lib/kokkos/benchmarks/policy_performance/Makefile index f50aea720e..21365f36c6 100644 --- a/lib/kokkos/benchmarks/policy_performance/Makefile +++ b/lib/kokkos/benchmarks/policy_performance/Makefile @@ -2,6 +2,7 @@ KOKKOS_DEVICES=Cuda KOKKOS_CUDA_OPTIONS=enable_lambda KOKKOS_ARCH = "SNB,Volta70" +KOKKOS_USE_DEPRECATED_MAKEFILES=1 MAKEFILE_PATH := $(subst Makefile,,$(abspath $(lastword $(MAKEFILE_LIST)))) diff --git a/lib/kokkos/benchmarks/policy_performance/main.cpp b/lib/kokkos/benchmarks/policy_performance/main.cpp index 0983a3d535..dd61ba6502 100644 --- a/lib/kokkos/benchmarks/policy_performance/main.cpp +++ b/lib/kokkos/benchmarks/policy_performance/main.cpp @@ -120,11 +120,12 @@ int main(int argc, char* argv[]) { // view appropriately for test and should obey first-touch etc Second call to // test is the one we actually care about and time view_type_1d v_1(Kokkos::view_alloc(Kokkos::WithoutInitializing, "v_1"), - team_range * team_size); + static_cast(team_range) * team_size); view_type_2d v_2(Kokkos::view_alloc(Kokkos::WithoutInitializing, "v_2"), - team_range * team_size, thread_range); + static_cast(team_range) * team_size, thread_range); view_type_3d v_3(Kokkos::view_alloc(Kokkos::WithoutInitializing, "v_3"), - team_range * team_size, thread_range, vector_range); + static_cast(team_range) * team_size, thread_range, + vector_range); double result_computed = 0.0; double result_expect = 0.0; diff --git a/lib/kokkos/benchmarks/policy_performance/policy_perf_test.hpp b/lib/kokkos/benchmarks/policy_performance/policy_perf_test.hpp index 0e23d221f6..8a874e0139 100644 --- a/lib/kokkos/benchmarks/policy_performance/policy_perf_test.hpp +++ b/lib/kokkos/benchmarks/policy_performance/policy_perf_test.hpp @@ -367,7 +367,7 @@ void test_policy(int team_range, int thread_range, int vector_range, // parallel_for RangePolicy: range = team_size*team_range if (test_type == 300) { Kokkos::parallel_for( - "300 outer for", team_size * team_range, + "300 outer for", static_cast(team_size) * team_range, KOKKOS_LAMBDA(const int idx) { v1(idx) = idx; // prevent compiler from optimizing away the loop @@ -376,14 +376,15 @@ void test_policy(int team_range, int thread_range, int vector_range, // parallel_reduce RangePolicy: range = team_size*team_range if (test_type == 400) { Kokkos::parallel_reduce( - "400 outer reduce", team_size * team_range, + "400 outer reduce", static_cast(team_size) * team_range, KOKKOS_LAMBDA(const int idx, double& val) { val += idx; }, result); result_expect = 0.5 * (team_size * team_range) * (team_size * team_range - 1); } // parallel_scan RangePolicy: range = team_size*team_range if (test_type == 500) { - Kokkos::parallel_scan("500 outer scan", team_size * team_range, + Kokkos::parallel_scan("500 outer scan", + static_cast(team_size) * team_range, ParallelScanFunctor(v1) #if 0 // This does not compile with pre Cuda 8.0 - see Github Issue #913 for explanation diff --git a/lib/kokkos/benchmarks/stream/Makefile b/lib/kokkos/benchmarks/stream/Makefile index 47a13838a4..529e789247 100644 --- a/lib/kokkos/benchmarks/stream/Makefile +++ b/lib/kokkos/benchmarks/stream/Makefile @@ -2,6 +2,7 @@ KOKKOS_DEVICES=Cuda KOKKOS_CUDA_OPTIONS=enable_lambda KOKKOS_ARCH = "SNB,Volta70" +KOKKOS_USE_DEPRECATED_MAKEFILES=1 MAKEFILE_PATH := $(subst Makefile,,$(abspath $(lastword $(MAKEFILE_LIST)))) diff --git a/lib/kokkos/benchmarks/view_copy_constructor/Makefile b/lib/kokkos/benchmarks/view_copy_constructor/Makefile index 70c6d517e0..77845a22b1 100644 --- a/lib/kokkos/benchmarks/view_copy_constructor/Makefile +++ b/lib/kokkos/benchmarks/view_copy_constructor/Makefile @@ -1,6 +1,7 @@ KOKKOS_DEVICES=Serial KOKKOS_ARCH = "" +KOKKOS_USE_DEPRECATED_MAKEFILES=1 MAKEFILE_PATH := $(subst Makefile,,$(abspath $(lastword $(MAKEFILE_LIST)))) diff --git a/lib/kokkos/bin/nvcc_wrapper b/lib/kokkos/bin/nvcc_wrapper index d58645f98a..8d3dbf1c75 100755 --- a/lib/kokkos/bin/nvcc_wrapper +++ b/lib/kokkos/bin/nvcc_wrapper @@ -317,7 +317,7 @@ do # End of Werror handling #Handle unsupported standard flags --std=c++1y|-std=c++1y|--std=gnu++1y|-std=gnu++1y|--std=c++1z|-std=c++1z|--std=gnu++1z|-std=gnu++1z|--std=c++2a|-std=c++2a) - fallback_std_flag="-std=c++14" + fallback_std_flag="-std=c++17" # this is hopefully just occurring in a downstream project during CMake feature tests # we really have no choice here but to accept the flag and change to an accepted C++ standard echo "nvcc_wrapper does not accept standard flags $1 since partial standard flags and standards after C++17 are not supported. nvcc_wrapper will use $fallback_std_flag instead. It is undefined behavior to use this flag. This should only be occurring during CMake configuration." @@ -346,35 +346,17 @@ do # NVCC only has C++20 from version 12 on cuda_main_version=$([[ $(${nvcc_compiler} --version) =~ V([0-9]+) ]] && echo ${BASH_REMATCH[1]}) if [ ${cuda_main_version} -lt 12 ]; then - fallback_std_flag="-std=c++14" + fallback_std_flag="-std=c++17" # this is hopefully just occurring in a downstream project during CMake feature tests # we really have no choice here but to accept the flag and change to an accepted C++ standard - echo "nvcc_wrapper does not accept standard flags $1 since partial standard flags and standards after C++14 are not supported. nvcc_wrapper will use $fallback_std_flag instead. It is undefined behavior to use this flag. This should only be occurring during CMake configuration." + echo "nvcc_wrapper does not accept standard flags $1 since partial standard flags and standards after C++17 are not supported. nvcc_wrapper will use $fallback_std_flag instead. It is undefined behavior to use this flag. This should only be occurring during CMake configuration." std_flag=$fallback_std_flag else std_flag=$1 fi shared_args="$shared_args $std_flag" ;; - --std=c++17|-std=c++17) - if [ -n "$std_flag" ]; then - warn_std_flag - shared_args=${shared_args/ $std_flag/} - fi - # NVCC only has C++17 from version 11 on - cuda_main_version=$([[ $(${nvcc_compiler} --version) =~ V([0-9]+) ]] && echo ${BASH_REMATCH[1]}) - if [ ${cuda_main_version} -lt 11 ]; then - fallback_std_flag="-std=c++14" - # this is hopefully just occurring in a downstream project during CMake feature tests - # we really have no choice here but to accept the flag and change to an accepted C++ standard - echo "nvcc_wrapper does not accept standard flags $1 since partial standard flags and standards after C++14 are not supported. nvcc_wrapper will use $fallback_std_flag instead. It is undefined behavior to use this flag. This should only be occurring during CMake configuration." - std_flag=$fallback_std_flag - else - std_flag=$1 - fi - shared_args="$shared_args $std_flag" - ;; - --std=c++11|-std=c++11|--std=c++14|-std=c++14) + --std=c++11|-std=c++11|--std=c++14|-std=c++14|--std=c++17|-std=c++17) if [ -n "$std_flag" ]; then warn_std_flag shared_args=${shared_args/ $std_flag/} @@ -500,6 +482,20 @@ do xlinker_args="$xlinker_args -Xlinker ${1:4:${#1}}" host_linker_args="$host_linker_args ${1:4:${#1}}" ;; + #Handle host assembler options + -Wa,*) + #To pass the -Wa options to the host compiler via -Xcompiler it is necessary + #to use '\\,' for each comma in the options. As users might already add escapes + #to the comma by themselves, the escapes are first removed and then only the + #required number of \ are added back. + xcompiler_args_wa=$(echo -e "$1" | sed -E 's/\\\+,/,/g' | sed -E 's/,/\\\\\\\,/g') + if [ $first_xcompiler_arg -eq 1 ]; then + xcompiler_args="$xcompiler_args_wa" + first_xcompiler_arg=0 + else + xcompiler_args="$xcompiler_args,$xcompiler_args_wa" + fi + ;; #Handle object files: -x cu applies to all input files, so give them to linker, except if only linking *.a|*.so|*.o|*.obj) object_files="$object_files $1" diff --git a/lib/kokkos/cmake/KokkosConfig.cmake.in b/lib/kokkos/cmake/KokkosConfig.cmake.in index 1b6d1b66ff..aed9f1060c 100644 --- a/lib/kokkos/cmake/KokkosConfig.cmake.in +++ b/lib/kokkos/cmake/KokkosConfig.cmake.in @@ -2,65 +2,71 @@ # loaded by include() and find_package() commands except when invoked with # the NO_POLICY_SCOPE option # CMP0057 + NEW -> IN_LIST operator in IF(...) -CMAKE_POLICY(SET CMP0057 NEW) +cmake_policy(SET CMP0057 NEW) # Compute paths @PACKAGE_INIT@ #Find dependencies -INCLUDE(CMakeFindDependencyMacro) +include(CMakeFindDependencyMacro) #This needs to go above the KokkosTargets in case #the Kokkos targets depend in some way on the TPL imports @KOKKOS_TPL_EXPORTS@ -GET_FILENAME_COMPONENT(Kokkos_CMAKE_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH) -INCLUDE("${Kokkos_CMAKE_DIR}/KokkosTargets.cmake") -INCLUDE("${Kokkos_CMAKE_DIR}/KokkosConfigCommon.cmake") -UNSET(Kokkos_CMAKE_DIR) +get_filename_component(Kokkos_CMAKE_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH) +include("${Kokkos_CMAKE_DIR}/KokkosTargets.cmake") +include("${Kokkos_CMAKE_DIR}/KokkosConfigCommon.cmake") +unset(Kokkos_CMAKE_DIR) # check for conflicts -IF("launch_compiler" IN_LIST Kokkos_FIND_COMPONENTS AND - "separable_compilation" IN_LIST Kokkos_FIND_COMPONENTS) - MESSAGE(STATUS "'launch_compiler' implies global redirection of targets depending on Kokkos to appropriate compiler.") - MESSAGE(STATUS "'separable_compilation' implies explicitly defining where redirection occurs via 'kokkos_compilation(PROJECT|TARGET|SOURCE|DIRECTORY ...)'") - MESSAGE(FATAL_ERROR "Conflicting COMPONENTS: 'launch_compiler' and 'separable_compilation'") -ENDIF() +if("launch_compiler" IN_LIST Kokkos_FIND_COMPONENTS AND "separable_compilation" IN_LIST Kokkos_FIND_COMPONENTS) + message(STATUS "'launch_compiler' implies global redirection of targets depending on Kokkos to appropriate compiler.") + message( + STATUS + "'separable_compilation' implies explicitly defining where redirection occurs via 'kokkos_compilation(PROJECT|TARGET|SOURCE|DIRECTORY ...)'" + ) + message(FATAL_ERROR "Conflicting COMPONENTS: 'launch_compiler' and 'separable_compilation'") +endif() -IF("launch_compiler" IN_LIST Kokkos_FIND_COMPONENTS) - # - # if find_package(Kokkos COMPONENTS launch_compiler) then rely on the - # RULE_LAUNCH_COMPILE and RULE_LAUNCH_LINK to always redirect to the - # appropriate compiler for Kokkos - # +if("launch_compiler" IN_LIST Kokkos_FIND_COMPONENTS) + # + # if find_package(Kokkos COMPONENTS launch_compiler) then rely on the + # RULE_LAUNCH_COMPILE and RULE_LAUNCH_LINK to always redirect to the + # appropriate compiler for Kokkos + # - MESSAGE(STATUS "kokkos_launch_compiler is enabled globally. C++ compiler commands with -DKOKKOS_DEPENDENCE will be redirected to the appropriate compiler for Kokkos") - kokkos_compilation( - GLOBAL - CHECK_CUDA_COMPILES) + message( + STATUS + "kokkos_launch_compiler is enabled globally. C++ compiler commands with -DKOKKOS_DEPENDENCE will be redirected to the appropriate compiler for Kokkos" + ) + kokkos_compilation(GLOBAL CHECK_CUDA_COMPILES) -ELSEIF(@Kokkos_ENABLE_CUDA@ - AND NOT @KOKKOS_COMPILE_LANGUAGE@ STREQUAL CUDA - AND NOT "separable_compilation" IN_LIST Kokkos_FIND_COMPONENTS) - # - # if CUDA was enabled, the compilation language was not set to CUDA, and separable compilation was not - # specified, then set the RULE_LAUNCH_COMPILE and RULE_LAUNCH_LINK globally and - # kokkos_launch_compiler will re-direct to the compiler used to compile CUDA code during installation. - # kokkos_launch_compiler will re-direct if ${CMAKE_CXX_COMPILER} and -DKOKKOS_DEPENDENCE is present, - # otherwise, the original command will be executed - # +elseif(@Kokkos_ENABLE_CUDA@ AND NOT @KOKKOS_COMPILE_LANGUAGE@ STREQUAL CUDA AND NOT "separable_compilation" IN_LIST + Kokkos_FIND_COMPONENTS +) + # + # if CUDA was enabled, the compilation language was not set to CUDA, and separable compilation was not + # specified, then set the RULE_LAUNCH_COMPILE and RULE_LAUNCH_LINK globally and + # kokkos_launch_compiler will re-direct to the compiler used to compile CUDA code during installation. + # kokkos_launch_compiler will re-direct if ${CMAKE_CXX_COMPILER} and -DKOKKOS_DEPENDENCE is present, + # otherwise, the original command will be executed + # - # run test to see if CMAKE_CXX_COMPILER=nvcc_wrapper - kokkos_compiler_is_nvcc(IS_NVCC ${CMAKE_CXX_COMPILER}) + # run test to see if CMAKE_CXX_COMPILER=nvcc_wrapper + kokkos_compiler_is_nvcc(IS_NVCC ${CMAKE_CXX_COMPILER}) - # if not nvcc_wrapper and Kokkos_LAUNCH_COMPILER was not set to OFF - IF(NOT IS_NVCC AND (NOT DEFINED Kokkos_LAUNCH_COMPILER OR Kokkos_LAUNCH_COMPILER)) - MESSAGE(STATUS "kokkos_launch_compiler is enabled globally. C++ compiler commands with -DKOKKOS_DEPENDENCE will be redirected to the appropriate compiler for Kokkos") - kokkos_compilation(GLOBAL) - ENDIF() + # if not nvcc_wrapper and Kokkos_LAUNCH_COMPILER was not set to OFF + if(NOT IS_NVCC AND (NOT DEFINED Kokkos_LAUNCH_COMPILER OR Kokkos_LAUNCH_COMPILER)) + message( + STATUS + "kokkos_launch_compiler is enabled globally. C++ compiler commands with -DKOKKOS_DEPENDENCE will be redirected to the appropriate compiler for Kokkos" + ) + kokkos_compilation(GLOBAL) + endif() - # be mindful of the environment, pollution is bad - UNSET(IS_NVCC) -ENDIF() + # be mindful of the environment, pollution is bad + unset(IS_NVCC) +endif() set(Kokkos_COMPILE_LANGUAGE @KOKKOS_COMPILE_LANGUAGE@) diff --git a/lib/kokkos/cmake/KokkosConfigCommon.cmake.in b/lib/kokkos/cmake/KokkosConfigCommon.cmake.in index d3ac39ffa3..769dff6b10 100644 --- a/lib/kokkos/cmake/KokkosConfigCommon.cmake.in +++ b/lib/kokkos/cmake/KokkosConfigCommon.cmake.in @@ -1,67 +1,67 @@ -SET(Kokkos_DEVICES @KOKKOS_ENABLED_DEVICES@) -SET(Kokkos_OPTIONS @KOKKOS_ENABLED_OPTIONS@) -SET(Kokkos_TPLS @KOKKOS_ENABLED_TPLS@) -SET(Kokkos_ARCH @KOKKOS_ENABLED_ARCH_LIST@) -SET(Kokkos_CXX_COMPILER "@CMAKE_CXX_COMPILER@") -SET(Kokkos_CXX_COMPILER_ID "@KOKKOS_CXX_COMPILER_ID@") -SET(Kokkos_CXX_COMPILER_VERSION "@KOKKOS_CXX_COMPILER_VERSION@") -SET(Kokkos_CXX_STANDARD @KOKKOS_CXX_STANDARD@) +set(Kokkos_DEVICES @KOKKOS_ENABLED_DEVICES@) +set(Kokkos_OPTIONS @KOKKOS_ENABLED_OPTIONS@) +set(Kokkos_TPLS @KOKKOS_ENABLED_TPLS@) +set(Kokkos_ARCH @KOKKOS_ENABLED_ARCH_LIST@) +set(Kokkos_CXX_COMPILER "@CMAKE_CXX_COMPILER@") +set(Kokkos_CXX_COMPILER_ID "@KOKKOS_CXX_COMPILER_ID@") +set(Kokkos_CXX_COMPILER_VERSION "@KOKKOS_CXX_COMPILER_VERSION@") +set(Kokkos_CXX_STANDARD @KOKKOS_CXX_STANDARD@) # Required to be a TriBITS-compliant external package -IF(NOT TARGET Kokkos::all_libs) +if(NOT TARGET Kokkos::all_libs) # CMake Error at /lib/cmake/Kokkos/KokkosConfigCommon.cmake:10 (ADD_LIBRARY): # ADD_LIBRARY cannot create ALIAS target "Kokkos::all_libs" because target # "Kokkos::kokkos" is imported but not globally visible. - IF(CMAKE_VERSION VERSION_LESS "3.18") - SET_TARGET_PROPERTIES(Kokkos::kokkos PROPERTIES IMPORTED_GLOBAL ON) - ENDIF() - ADD_LIBRARY(Kokkos::all_libs ALIAS Kokkos::kokkos) -ENDIF() + if(CMAKE_VERSION VERSION_LESS "3.18") + set_target_properties(Kokkos::kokkos PROPERTIES IMPORTED_GLOBAL ON) + endif() + add_library(Kokkos::all_libs ALIAS Kokkos::kokkos) +endif() # Export Kokkos_ENABLE_ for each backend that was enabled. # NOTE: "Devices" is a little bit of a misnomer here. These are really # backends, e.g. Kokkos_ENABLE_OPENMP, Kokkos_ENABLE_CUDA, Kokkos_ENABLE_HIP, # or Kokkos_ENABLE_SYCL. -FOREACH(DEV ${Kokkos_DEVICES}) - SET(Kokkos_ENABLE_${DEV} ON) -ENDFOREACH() +foreach(DEV ${Kokkos_DEVICES}) + set(Kokkos_ENABLE_${DEV} ON) +endforeach() # Export relevant Kokkos_ENABLE