Merge branch 'lammps:develop' into ml-uf3

This commit is contained in:
Ajinkya Hire
2024-04-13 18:56:07 -04:00
committed by GitHub
476 changed files with 16747 additions and 14762 deletions

2
.github/CODEOWNERS vendored
View File

@ -84,7 +84,7 @@ src/bond.* @sjplimp
src/comm*.* @sjplimp
src/compute.* @sjplimp
src/dihedral.* @sjplimp
src/domain.* @sjplimp
src/domain.* @sjplimp @stanmoore1
src/dump*.* @sjplimp
src/error.* @sjplimp
src/finish.* @sjplimp

View File

@ -45,8 +45,8 @@ if(DOWNLOAD_KOKKOS)
list(APPEND KOKKOS_LIB_BUILD_ARGS "-DCMAKE_CXX_EXTENSIONS=${CMAKE_CXX_EXTENSIONS}")
list(APPEND KOKKOS_LIB_BUILD_ARGS "-DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}")
include(ExternalProject)
set(KOKKOS_URL "https://github.com/kokkos/kokkos/archive/4.2.01.tar.gz" CACHE STRING "URL for KOKKOS tarball")
set(KOKKOS_MD5 "16b9b09ae947d434dfb58fc5c87c2b76" CACHE STRING "MD5 checksum of KOKKOS tarball")
set(KOKKOS_URL "https://github.com/kokkos/kokkos/archive/4.3.00.tar.gz" CACHE STRING "URL for KOKKOS tarball")
set(KOKKOS_MD5 "889dcea2b5ced3debdc5b0820044bdc4" CACHE STRING "MD5 checksum of KOKKOS tarball")
mark_as_advanced(KOKKOS_URL)
mark_as_advanced(KOKKOS_MD5)
GetFallbackURL(KOKKOS_URL KOKKOS_FALLBACK)
@ -71,7 +71,7 @@ if(DOWNLOAD_KOKKOS)
add_dependencies(LAMMPS::KOKKOSCORE kokkos_build)
add_dependencies(LAMMPS::KOKKOSCONTAINERS kokkos_build)
elseif(EXTERNAL_KOKKOS)
find_package(Kokkos 4.2.01 REQUIRED CONFIG)
find_package(Kokkos 4.3.00 REQUIRED CONFIG)
target_link_libraries(lammps PRIVATE Kokkos::kokkos)
else()
set(LAMMPS_LIB_KOKKOS_SRC_DIR ${LAMMPS_LIB_SOURCE_DIR}/kokkos)

View File

@ -533,9 +533,6 @@ They must be specified in uppercase.
* - A64FX
- HOST
- ARMv8.2 with SVE Support
* - WSM
- HOST
- Intel Westmere CPU (SSE 4.2)
* - SNB
- HOST
- Intel Sandy/Ivy Bridge CPU (AVX 1)
@ -566,18 +563,15 @@ They must be specified in uppercase.
* - KNL
- HOST
- Intel Knights Landing Xeon Phi
* - BGQ
- HOST
- IBM Blue Gene/Q CPU
* - POWER7
- HOST
- IBM POWER7 CPU
* - POWER8
- HOST
- IBM POWER8 CPU
* - POWER9
- HOST
- IBM POWER9 CPU
* - RISCV_SG2042
- HOST
- SG2042 (RISC-V) CPU
* - KEPLER30
- GPU
- NVIDIA Kepler generation CC 3.0 GPU
@ -666,7 +660,7 @@ They must be specified in uppercase.
- GPU
- Intel GPU Ponte Vecchio
This list was last updated for version 4.2 of the Kokkos library.
This list was last updated for version 4.3.0 of the Kokkos library.
.. tabs::

View File

@ -245,6 +245,7 @@ OPT.
* :doc:`oxrna2/coaxstk <pair_oxrna2>`
* :doc:`pace (k) <pair_pace>`
* :doc:`pace/extrapolation (k) <pair_pace>`
* :doc:`pedone (o) <pair_pedone>`
* :doc:`pod <pair_pod>`
* :doc:`peri/eps <pair_peri>`
* :doc:`peri/lps (o) <pair_peri>`

View File

@ -635,10 +635,10 @@ Tohoku University (under MIT license)
----------
.. doxygenfunction:: MathEigen::jacobi3(double const *const *mat, double *eval, double **evec)
.. doxygenfunction:: MathEigen::jacobi3(double const *const *mat, double *eval, double **evec, int sort)
:project: progguide
.. doxygenfunction:: MathEigen::jacobi3(double const mat[3][3], double *eval, double evec[3][3])
.. doxygenfunction:: MathEigen::jacobi3(double const mat[3][3], double *eval, double evec[3][3], int sort)
:project: progguide
---------------------------

View File

@ -13,15 +13,44 @@ discussions of such cases.
Unknown identifier in data file
-------------------------------
This error happens when LAMMPS encounters a line of text in an unexpected format
while reading a data file. This is most commonly cause by inconsistent header and
section data. The header section informs LAMMPS how many entries or lines are expected in the
various sections (like Atoms, Masses, Pair Coeffs, *etc.*\ ) of the data file.
If there is a mismatch, LAMMPS will either keep reading beyond the end of a section
or stop reading before the section has ended.
This error happens when LAMMPS encounters a line of text with an
unexpected keyword while :doc:`reading a data file <read_data>`. This
would be either header keywords or section header keywords. This is
most commonly due to a mistyped keyword or due to a keyword that is
inconsistent with the :doc:`atom style <atom_style>` used.
Such a mismatch can happen unexpectedly when the first line of the data
is *not* a comment as required by the format. That would result in
LAMMPS expecting, for instance, 0 atoms because the "atoms" header line
is treated as a comment.
The header section informs LAMMPS how many entries or lines are expected
in the various sections (like Atoms, Masses, Pair Coeffs, *etc.*\ ) of
the data file. If there is a mismatch, LAMMPS will either keep reading
beyond the end of a section or stop reading before the section has
ended. In that case the next line will not contain a recognized keyword.
Such a mismatch can also happen when the first line of the data
is *not* a comment as required by the format, but a line with a valid
header keyword. That would result in LAMMPS expecting, for instance,
0 atoms because the "atoms" header line is the first line and thus
treated as a comment.
Another possibility to trigger this error is to have a keyword in the
data file that corresponds to a fix (e.g. :doc:`fix cmap <fix_cmap>`)
but the :doc:`read_data <read_data>` command is missing the (optional)
arguments that identify the fix and the header keyword and section
keyword or those arguments are inconsistent with the keywords in the
data file.
.. _err0002:
Incorrect format in ... section of data file
--------------------------------------------
This error happens when LAMMPS reads the contents of a section of a
:doc:`data file <read_data>` and the number of parameters in the line
differs from what is expected. This most commonly happens, when the
atom style is different from what is expected for a specific data file
since changing the atom style usually changes the format of the line.
This error can also happen when the number of entries indicated in the
header of a data file (e.g. the number of atoms) is larger than the
number of lines provided (e.g. in the corresponding Atoms section)
and then LAMMPS will continue reading into the next section and that
would have a completely different format.

View File

@ -65,7 +65,6 @@ Examples
fix 1 all ave/correlate 1 50 10000 &
c_thermo_press[1] c_thermo_press[2] c_thermo_press[3] &
type upper ave running title1 "My correlation data"
fix 1 all ave/correlate 1 50 10000 c_thermo_press[*]
Description

View File

@ -20,11 +20,11 @@ Syntax
.. parsed-literal::
c_ID = global scalar calculated by a compute with ID
c_ID[I] = Ith component of global vector calculated by a compute with ID
c_ID[I] = Ith component of global vector calculated by a compute with ID, I can include wildcard (see below)
f_ID = global scalar calculated by a fix with ID
f_ID[I] = Ith component of global vector calculated by a fix with ID
f_ID[I] = Ith component of global vector calculated by a fix with ID, I can include wildcard (see below)
v_name = global value calculated by an equal-style variable with name
v_name[I] = Ith component of global vector calculated by a vector-style variable with name
v_name[I] = Ith component of a vector-style variable with name, I can include wildcard (see below)
* zero or more keyword/arg pairs may be appended
* keyword = *type* or *start* or *file* or *overwrite* or *title1* or *title2* or *ncorr* or *nlen* or *ncount*
@ -63,6 +63,7 @@ Examples
fix 1 all ave/correlate/long 1 10000 &
c_thermo_press[1] c_thermo_press[2] c_thermo_press[3] &
type upper title1 "My correlation data" nlen 15 ncount 3
fix 1 all ave/correlate/long 1 10000 c_thermo_press[*]
Description
"""""""""""
@ -80,8 +81,10 @@ specified values may represent calculations performed by computes and
fixes which store their own "group" definitions.
Each listed value can be the result of a compute or fix or the
evaluation of an equal-style variable. See the
:doc:`fix ave/correlate <fix_ave_correlate>` page for details.
evaluation of an equal-style or vector-style variable. For
vector-style variables, the specified indices can include a wildcard
character. See the :doc:`fix ave/correlate <fix_ave_correlate>` page
for details.
The *Nevery* and *Nfreq* arguments specify on what time steps the input
values will be used to calculate correlation data and the frequency

View File

@ -136,23 +136,23 @@ transfer between the subsystems:
\bigtriangledown (\kappa_e \bigtriangledown T_e) -
g_p (T_e - T_a) + g_s T_a'
where C_e is the specific heat, rho_e is the density, kappa_e is the
thermal conductivity, T is temperature, the "e" and "a" subscripts
represent electronic and atomic subsystems respectively, g_p is the
coupling constant for the electron-ion interaction, and g_s is the
electron stopping coupling parameter. C_e, rho_e, and kappa_e are
specified as parameters to the fix. The other quantities are derived.
The form of the heat diffusion equation used here is almost the same
as that in equation 6 of :ref:`(Duffy) <Duffy>`, with the exception that the
electronic density is explicitly represented, rather than being part
of the specific heat parameter.
where :math:`C_e` is the specific heat, :math:`\rho_e` is the density,
:math:`\kappa_e` is the thermal conductivity, *T* is temperature, the
"e" and "a" subscripts represent electronic and atomic subsystems
respectively, :math:`g_p` is the coupling constant for the electron-ion
interaction, and :math:`g_s` is the electron stopping coupling
parameter. :math:`C_e`, :math:`\rho_e`, and :math:`\kappa_e` are
specified as parameters to the fix *ttm* or *ttm/grid*. The other
quantities are derived. The form of the heat diffusion equation used
here is almost the same as that in equation 6 of :ref:`(Duffy) <Duffy>`,
with the exception that the electronic density is explicitly
represented, rather than being part of the specific heat parameter.
Currently, the TTM fixes assume that none of the user-supplied
parameters will vary with temperature. Note that :ref:`(Duffy)
<Duffy>` used a tanh() functional form for the temperature dependence
of the electronic specific heat, but ignored temperature dependencies
of any of the other parameters. See more discussion below for fix
ttm/mod.
parameters will vary with temperature. Note that :ref:`(Duffy) <Duffy>`
used a tanh() functional form for the temperature dependence of the
electronic specific heat, but ignored temperature dependencies of any of
the other parameters. See more discussion below for fix *ttm/mod*.
.. note::
@ -265,27 +265,27 @@ heat sources (e.g. laser heating in ablation simulations):
\bigtriangledown (\kappa_e \bigtriangledown T_e) -
g_p (T_e - T_a) + g_s T_a' + \theta (x-x_{surface})I_0 \exp(-x/l_{skin})
where theta is the Heaviside step function, I_0 is the (absorbed)
laser pulse intensity for ablation simulations, l_skin is the depth
of skin-layer, and all other designations have the same meaning as in
the former equation. The duration of the pulse is set by the parameter
*tau* in the *init_file*.
where :math:`\theta` is the Heaviside step function, :math:`I_0` is the
(absorbed) laser pulse intensity for ablation simulations,
:math:`l_{skin}` is the depth of the skin-layer, and all other
designations have the same meaning as in the former equation. The
duration of the pulse is set by the parameter *tau* in the *init_file*.
Fix ttm/mod also allows users to specify the dependencies of C_e and
kappa_e on the electronic temperature. The specific heat is expressed
as
Fix *ttm/mod* also allows users to specify the dependencies of
:math:`C_e` and :math:`\kappa_e` on the electronic temperature. The
specific heat is expressed as
.. math::
C_e = C_0 + (a_0 + a_1 X + a_2 X^2 + a_3 X^3 + a_4 X^4) \exp (-(AX)^2)
where *X* = T_e/1000, and the thermal conductivity is defined as
kappa_e = D_e\*rho_e\*C_e, where D_e is the thermal diffusion
coefficient.
where :math:`X = \frac{T_e}{1000}`, and the thermal conductivity is
defined as :math:`\kappa_e = D_e \cdot rho_e \cdot C_e`, where
:math:`D_e` is the thermal diffusion coefficient.
Electronic pressure effects are included in the TTM model to account
for the blast force acting on ions because of electronic pressure
gradient (see :ref:`(Chen) <Chen>`, :ref:`(Norman) <Norman>`). The total force
Electronic pressure effects are included in the TTM model to account for
the blast force acting on ions because of electronic pressure gradient
(see :ref:`(Chen) <Chen>`, :ref:`(Norman) <Norman>`). The total force
acting on an ion is:
.. math::
@ -293,13 +293,14 @@ acting on an ion is:
{\vec F}_i = - \partial U / \partial {\vec r}_i + {\vec
F}_{langevin} - \nabla P_e/n_{ion}
where F_langevin is a force from Langevin thermostat simulating
electron-phonon coupling, and nabla P_e/n_ion is the electron blast
force.
where :math:`F_{langevin}` is a force from Langevin thermostat
simulating electron-phonon coupling, and :math:`\nabla P_e/n_{ion}` is
the electron blast force.
The electronic pressure is taken to be P_e = B\*rho_e\*C_e\*T_e
The electronic pressure is taken to be :math:`P_e = B \cdot rho_e \cdot
C_e \cdot T_e`
The current fix ttm/mod implementation allows TTM simulations with a
The current fix *ttm/mod* implementation allows TTM simulations with a
vacuum. The vacuum region is defined as the grid cells with zero
electronic temperature. The numerical scheme does not allow energy
exchange with such cells. Since the material can expand to previously
@ -319,10 +320,10 @@ electronic pressure gradient is calculated as
\frac{x}{x+\lambda}\frac{(C_e{}T_e)_{x+\Delta
x}-(C_e{}T_e)_{x}}{\Delta x} \right]
where lambda is the electron mean free path (see :ref:`(Norman) <Norman>`,
:ref:`(Pisarev) <Pisarev>`)
where :math:`\lambda` is the electron mean free path (see :ref:`(Norman)
<Norman>`, :ref:`(Pisarev) <Pisarev>`)
The fix ttm/mod parameter file *init_file* has the following syntax.
The fix *ttm/mod* parameter file *init_file* has the following syntax.
Every line with an odd number is considered as a comment and
ignored. The lines with the even numbers are treated as follows:

137
doc/src/pair_pedone.rst Normal file
View File

@ -0,0 +1,137 @@
.. index:: pair_style pedone
.. index:: pair_style pedone/omp
pair_style pedone command
=========================
Accelerator Variants: *pedone/omp*
Syntax
""""""
.. code-block:: LAMMPS
pair_style style args
* style = pedone*
* args = list of arguments for a particular style
.. parsed-literal::
*pedone* args = cutoff
cutoff = global cutoff for Pedone interactions (distance units)
Examples
""""""""
.. code-block:: LAMMPS
pair_style hybrid/overlay pedone 15.0 coul/long 15.0
kspace_style pppm 1.0e-5
pair_coeff * * coul/long
pair_coeff 1 2 pedone 0.030211 2.241334 2.923245 5.0
pair_coeff 2 2 pedone 0.042395 1.379316 3.618701 22.0
Used in input scripts:
.. parsed-literal::
examples/PACKAGES/pedone/in.pedone.relax
examples/PACKAGES/pedone/in.pedone.melt
Description
"""""""""""
.. versionadded:: TBD
Pair style *pedone* computes the **non-Coulomb** interactions of the Pedone
(or PMMCS) potential :ref:`(Pedone) <Pedone>` which combines Coulomb
interactions, Morse potential, and repulsive :math:`r^{-12}`
Lennard-Jones terms (see below). The *pedone* pair style is meant
to be used in addition to a :doc:`Coulomb pair style <pair_coul>` via
pair style :doc:`hybrid/overlay <pair_hybrid>` (see example above).
Using *coul/long* or *could/dsf* (for solids) is recommended.
The full Pedone potential function from :ref:`(Pedone) <Pedone>` for each
pair of atoms is:
.. math::
E = \frac{C q_i q_j}{\epsilon r}
+ D_0 \left[ e^{- 2 \alpha (r - r_0)} - 2 e^{- \alpha (r - r_0)} \right]
+ \frac{B_0}{r^{12}} \qquad r < r_c
:math:`r_c` is the cutoff and :math:`C` is a conversion factor that is
specific to the choice of :doc:`units <units>` so that the entire
Coulomb term is in energy units with :math:`q_i` and :math:`q_j` as the
assigned charges in multiples of the elementary charge.
The following coefficients must be defined for the selected pairs of
atom types via the :doc:`pair_coeff <pair_coeff>` command as in the
example above:
* :math:`D_0` (energy units)
* :math:`\alpha` (1/distance units)
* :math:`r_0` (distance units)
* :math:`C_0` (energy units)
* cutoff (distance units)
The last coefficient is optional. If not specified, the global *pedone*
cutoff is used.
----------
.. include:: accel_styles.rst
----------
Mixing, shift, table, tail correction, restart, rRESPA info
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
This pair style does not support mixing.
This pair style support the :doc:`pair_modify <pair_modify>` shift
option for the energy of the pair interaction.
This pair style does not support the :doc:`pair_modify <pair_modify>`
tail option for adding long-range tail corrections to energy and
pressure.
This pair style writes its information to :doc:`binary restart files <restart>`,
so pair_style and pair_coeff commands does not need to be specified in an input
script that reads a restart file.
This pair style can only be used via the *pair* keyword of the
:doc:`run_style respa <run_style>` command. It does not support the
*inner*, *middle*, or *outer* keywords.
----------
Restrictions
""""""""""""
The *pedone* pair style is only enabled if LAMMPS was built with the
EXTRA-PAIR package. See the :doc:`Build package <Build_package>` page
for more info.
Related commands
""""""""""""""""
:doc:`pair_coeff <pair_coeff>`, :doc:`pair_style <pair_style>`,
:doc:`pair style coul/long and coul/dsf <pair_coul>`,
:doc:`pair style morse <pair_morse>`
Default
"""""""
none
-------------
.. _Pedone:
**(Pedone)** A. Pedone, G. Malavasi, M. C. Menziani, A. N. Cormack, and U. Segre, J. Phys. Chem. B, 110, 11780 (2006)

View File

@ -275,30 +275,30 @@ accelerated styles exist.
* :doc:`lj/smooth/linear <pair_lj_smooth_linear>` - linear smoothed LJ potential
* :doc:`lj/switch3/coulgauss/long <pair_lj_switch3_coulgauss_long>` - smoothed LJ vdW potential with Gaussian electrostatics
* :doc:`lj96/cut <pair_lj96>` - Lennard-Jones 9/6 potential
* :doc:`local/density <pair_local_density>` - generalized basic local density potential
* :doc:`lubricate <pair_lubricate>` - hydrodynamic lubrication forces
* :doc:`lubricate/poly <pair_lubricate>` - hydrodynamic lubrication forces with polydispersity
* :doc:`lubricateU <pair_lubricateU>` - hydrodynamic lubrication forces for Fast Lubrication Dynamics
* :doc:`lubricateU/poly <pair_lubricateU>` - hydrodynamic lubrication forces for Fast Lubrication with polydispersity
* :doc:`local/density <pair_local_density>` - Generalized basic local density potential
* :doc:`lubricate <pair_lubricate>` - Hydrodynamic lubrication forces
* :doc:`lubricate/poly <pair_lubricate>` - Hydrodynamic lubrication forces with polydispersity
* :doc:`lubricateU <pair_lubricateU>` - Hydrodynamic lubrication forces for Fast Lubrication Dynamics
* :doc:`lubricateU/poly <pair_lubricateU>` - Hydrodynamic lubrication forces for Fast Lubrication with polydispersity
* :doc:`mdpd <pair_mesodpd>` - mDPD particle interactions
* :doc:`mdpd/rhosum <pair_mesodpd>` - mDPD particle interactions for mass density
* :doc:`meam <pair_meam>` - modified embedded atom method (MEAM)
* :doc:`meam/ms <pair_meam>` - multi-state modified embedded atom method (MS-MEAM)
* :doc:`meam/spline <pair_meam_spline>` - splined version of MEAM
* :doc:`meam/sw/spline <pair_meam_sw_spline>` - splined version of MEAM with a Stillinger-Weber term
* :doc:`mesocnt <pair_mesocnt>` - mesoscopic vdW potential for (carbon) nanotubes
* :doc:`mesocnt/viscous <pair_mesocnt>` - mesoscopic vdW potential for (carbon) nanotubes with friction
* :doc:`mgpt <pair_mgpt>` - simplified model generalized pseudopotential theory (MGPT) potential
* :doc:`meam <pair_meam>` - Modified embedded atom method (MEAM)
* :doc:`meam/ms <pair_meam>` - Multi-state modified embedded atom method (MS-MEAM)
* :doc:`meam/spline <pair_meam_spline>` - Splined version of MEAM
* :doc:`meam/sw/spline <pair_meam_sw_spline>` - Splined version of MEAM with a Stillinger-Weber term
* :doc:`mesocnt <pair_mesocnt>` - Mesoscopic vdW potential for (carbon) nanotubes
* :doc:`mesocnt/viscous <pair_mesocnt>` - Mesoscopic vdW potential for (carbon) nanotubes with friction
* :doc:`mgpt <pair_mgpt>` - Simplified model generalized pseudopotential theory (MGPT) potential
* :doc:`mie/cut <pair_mie>` - Mie potential
* :doc:`mm3/switch3/coulgauss/long <pair_lj_switch3_coulgauss_long>` - smoothed MM3 vdW potential with Gaussian electrostatics
* :doc:`mm3/switch3/coulgauss/long <pair_lj_switch3_coulgauss_long>` - Smoothed MM3 vdW potential with Gaussian electrostatics
* :doc:`momb <pair_momb>` - Many-Body Metal-Organic (MOMB) force field
* :doc:`morse <pair_morse>` - Morse potential
* :doc:`morse/smooth/linear <pair_morse>` - linear smoothed Morse potential
* :doc:`morse/smooth/linear <pair_morse>` - Linear smoothed Morse potential
* :doc:`morse/soft <pair_morse>` - Morse potential with a soft core
* :doc:`multi/lucy <pair_multi_lucy>` - DPD potential with density-dependent force
* :doc:`multi/lucy/rx <pair_multi_lucy_rx>` - reactive DPD potential with density-dependent force
* :doc:`nb3b/harmonic <pair_nb3b>` - non-bonded 3-body harmonic potential
* :doc:`nb3b/screened <pair_nb3b>` - non-bonded 3-body screened harmonic potential
* :doc:`nb3b/harmonic <pair_nb3b>` - Non-bonded 3-body harmonic potential
* :doc:`nb3b/screened <pair_nb3b>` - Non-bonded 3-body screened harmonic potential
* :doc:`nm/cut <pair_nm>` - N-M potential
* :doc:`nm/cut/coul/cut <pair_nm>` - N-M potential with cutoff Coulomb
* :doc:`nm/cut/coul/long <pair_nm>` - N-M potential with long-range Coulomb
@ -322,21 +322,22 @@ accelerated styles exist.
* :doc:`oxrna2/xstk <pair_oxrna2>` -
* :doc:`pace <pair_pace>` - Atomic Cluster Expansion (ACE) machine-learning potential
* :doc:`pace/extrapolation <pair_pace>` - Atomic Cluster Expansion (ACE) machine-learning potential with extrapolation grades
* :doc:`pedone <pair_pedone>` - Pedone (PMMCS) potential (non-Coulomb part)
* :doc:`pod <pair_pod>` - Proper orthogonal decomposition (POD) machine-learning potential
* :doc:`peri/eps <pair_peri>` - peridynamic EPS potential
* :doc:`peri/lps <pair_peri>` - peridynamic LPS potential
* :doc:`peri/pmb <pair_peri>` - peridynamic PMB potential
* :doc:`peri/ves <pair_peri>` - peridynamic VES potential
* :doc:`polymorphic <pair_polymorphic>` - polymorphic 3-body potential
* :doc:`peri/eps <pair_peri>` - Peridynamic EPS potential
* :doc:`peri/lps <pair_peri>` - Peridynamic LPS potential
* :doc:`peri/pmb <pair_peri>` - Peridynamic PMB potential
* :doc:`peri/ves <pair_peri>` - Peridynamic VES potential
* :doc:`polymorphic <pair_polymorphic>` - Polymorphic 3-body potential
* :doc:`python <pair_python>` -
* :doc:`quip <pair_quip>` -
* :doc:`rann <pair_rann>` -
* :doc:`reaxff <pair_reaxff>` - ReaxFF potential
* :doc:`rebo <pair_airebo>` - second generation REBO potential of Brenner
* :doc:`rebo <pair_airebo>` - Second generation REBO potential of Brenner
* :doc:`rebomos <pair_rebomos>` - REBOMoS potential for MoS2
* :doc:`resquared <pair_resquared>` - Everaers RE-Squared ellipsoidal potential
* :doc:`saip/metal <pair_saip_metal>` - interlayer potential for hetero-junctions formed with hexagonal 2D materials and metal surfaces
* :doc:`sdpd/taitwater/isothermal <pair_sdpd_taitwater_isothermal>` - smoothed dissipative particle dynamics for water at isothermal conditions
* :doc:`saip/metal <pair_saip_metal>` - Interlayer potential for hetero-junctions formed with hexagonal 2D materials and metal surfaces
* :doc:`sdpd/taitwater/isothermal <pair_sdpd_taitwater_isothermal>` - Smoothed dissipative particle dynamics for water at isothermal conditions
* :doc:`smatb <pair_smatb>` - Second Moment Approximation to the Tight Binding
* :doc:`smatb/single <pair_smatb>` - Second Moment Approximation to the Tight Binding for single-element systems
* :doc:`smd/hertz <pair_smd_hertz>` -

View File

@ -279,9 +279,9 @@ This means the variable can then be evaluated as many times as desired
and will return those values. There are two ways to cause the next
set of per-atom values from the file to be read: use the
:doc:`next <next>` command or the next() function in an atom-style
variable, as discussed below. Unlike most variable styles
atomfile-style variables are **deleted** during a :doc:`clear <clear>`
command.
variable, as discussed below. Unlike most variable styles, which
remain defined, atomfile-style variables are **deleted** during a
:doc:`clear <clear>` command.
The rules for formatting the file are as follows. Each time a set of
per-atom values is read, a non-blank line is searched for in the file.
@ -289,23 +289,37 @@ The file is read line by line but only up to 254 characters are used.
The rest are ignored. A comment character "#" can be used anywhere
on a line and all text following and the "#" character are ignored;
text starting with the comment character is stripped. Blank lines
are skipped. The first "word" of a non-blank line, delimited by
white-space, is read as the count N of per-atom lines to immediately
follow. N can be the total number of atoms in the system, or only a
subset. The next N lines have the following format
.. parsed-literal::
ID value
where ID is an atom ID and value is the per-atom numeric value that
will be assigned to that atom. IDs can be listed in any order.
are skipped. The first non-blank line is expected to contain a single
integer number as the count *N* of per-atom lines to follow. *N* can
be the total number of atoms in the system or less, indicating that data
for a subset is read. The next N lines must consist of two numbers,
the atom-ID of the atom for which a value is set followed by a floating
point number with the value. The atom-IDs may be listed in any order.
.. note::
Every time a set of per-atom lines is read, the value for all
atoms is first set to 0.0. Thus values for atoms whose ID does not
appear in the set, will remain 0.0.
Every time a set of per-atom lines is read, the value of the atomfile
variable for **all** atoms is first initialized to 0.0. Thus values
for atoms whose ID do not appear in the set in the file will remain
at 0.0.
Below is a small example for the atomfile variable file format:
.. parsed-literal::
# first set
4
# atom-ID value
3 1
4 -4
1 0.5
2 -0.5
# second set
2
2 1.0
4 -1.0
----------
@ -1174,12 +1188,17 @@ custom atom properties are the same; just replace the leading "i" with
+--------+---------------+------------------------------------------+
| equal | i_name[I] | element of per-atom vector (I = atom ID) |
+--------+---------------+------------------------------------------+
| equal | i2_name[I][J] | element of per-atom array (I = atom ID) |
+--------+---------------+------------------------------------------+
+--------+---------------+------------------------------------------+
| vector | i_name[I] | element of per-atom vector (I = atom ID) |
+--------+---------------+------------------------------------------+
| vector | i2_name[I][J] | element of per-atom array (I = atom ID) |
+--------+---------------+------------------------------------------+
+--------+---------------+------------------------------------------+
| atom | i_name | per-atom vector |
+--------+---------------+------------------------------------------+
| atom | i2_name[I] | column of per-atom array |
+--------+---------------+------------------------------------------+
@ -1222,15 +1241,23 @@ table:
+--------+------------+------------------------------------------+
| equal | c_ID | global scalar |
+--------+------------+------------------------------------------+
| equal | c_ID[I] | element of global vector |
+--------+------------+------------------------------------------+
| equal | c_ID[I][J] | element of global array |
+--------+------------+------------------------------------------+
| equal | C_ID[I] | element of per-atom vector (I = atom ID) |
+--------+------------+------------------------------------------+
| equal | C_ID[I][J] | element of per-atom array (I = atom ID) |
+--------+------------+------------------------------------------+
+--------+------------+------------------------------------------+
| vector | c_ID | global vector |
+--------+------------+------------------------------------------+
| vector | c_ID[I] | column of global array |
+--------+------------+------------------------------------------+
+--------+------------+------------------------------------------+
| atom | c_ID | per-atom vector |
+--------+------------+------------------------------------------+
| atom | c_ID[I] | column of per-atom array |
+--------+------------+------------------------------------------+
@ -1286,15 +1313,23 @@ and atom-style variables are listed in the following table:
+--------+------------+------------------------------------------+
| equal | f_ID | global scalar |
+--------+------------+------------------------------------------+
| equal | f_ID[I] | element of global vector |
+--------+------------+------------------------------------------+
| equal | f_ID[I][J] | element of global array |
+--------+------------+------------------------------------------+
| equal | F_ID[I] | element of per-atom vector (I = atom ID) |
+--------+------------+------------------------------------------+
| equal | F_ID[I][J] | element of per-atom array (I = atom ID) |
+--------+------------+------------------------------------------+
+--------+------------+------------------------------------------+
| vector | f_ID | global vector |
+--------+------------+------------------------------------------+
| vector | f_ID[I] | column of global array |
+--------+------------+------------------------------------------+
+--------+------------+------------------------------------------+
| atom | f_ID | per-atom vector |
+--------+------------+------------------------------------------+
| atom | f_ID[I] | column of per-atom array |
+--------+------------+------------------------------------------+
@ -1365,17 +1400,27 @@ per-atom vector.
+--------+-----------+-----------------------------------------------------------------------------------+
| equal | v_name | global scalar from an equal-style variable |
+--------+-----------+-----------------------------------------------------------------------------------+
| equal | v_name[I] | element of global vector from a vector-style variable |
+--------+-----------+-----------------------------------------------------------------------------------+
| equal | v_name[I] | element of per-atom vector (I = atom ID) from an atom- or atomfile-style variable |
+--------+-----------+-----------------------------------------------------------------------------------+
+--------+-----------+-----------------------------------------------------------------------------------+
| vector | v_name | global scalar from an equal-style variable |
+--------+-----------+-----------------------------------------------------------------------------------+
| vector | v_name | global vector from a vector-style variable |
+--------+-----------+-----------------------------------------------------------------------------------+
| vector | v_name[I] | element of global vector from a vector-style variable |
+--------+-----------+-----------------------------------------------------------------------------------+
| vector | v_name[I] | element of per-atom vector (I = atom ID) from an atom- or atomfile-style variable |
+--------+-----------+-----------------------------------------------------------------------------------+
+--------+-----------+-----------------------------------------------------------------------------------+
| atom | v_name | global scalar from an equal-style variable |
+--------+-----------+-----------------------------------------------------------------------------------+
| atom | v_name | per-atom vector from an atom-style or atomfile-style variable |
+--------+-----------+-----------------------------------------------------------------------------------+
| atom | v_name[I] | element of global vector from a vector-style variable |
+--------+-----------+-----------------------------------------------------------------------------------+
| atom | v_name[I] | element of per-atom vector (I = atom ID) from an atom- or atomfile-style variable |
+--------+-----------+-----------------------------------------------------------------------------------+

View File

@ -2043,6 +2043,7 @@ Makefiles
makelist
makepkg
Makse
Malavasi
malloc
Malolepsza
Manby
@ -2152,6 +2153,7 @@ membered
memcheck
Mendelev
Menon
Menziani
mer
Meremianin
Mersenne
@ -2775,6 +2777,8 @@ Peachey
peachpuff
Pearlman
Pedersen
pedone
Pedone
peID
PEigenDense
Peng

View File

@ -0,0 +1,38 @@
# Ca-O melt with Pedone potential
units metal
atom_style charge
lattice fcc 4.8105 # experimental lattice parameter for fcc-lattice Ca cations
region box block 0 4 0 4 0 4
create_box 2 box
create_atoms 1 box
lattice fcc 4.8105 origin 0.5 0.5 0.5 # O anion lattice shifted by half a unit cell
create_atoms 2 box
mass 1 40.078
mass 2 15.999
set type 1 charge 1.2
set type 2 charge -1.2
timestep 0.002
neigh_modify delay 5 every 1 check yes
pair_style hybrid/overlay pedone 15.0 coul/long 15.0
kspace_style pppm 1.0e-6
pair_coeff * * coul/long
pair_coeff 1 2 pedone 0.030211 2.241334 2.923245 5.0
pair_coeff 2 2 pedone 0.042395 1.379316 3.618701 22.0
velocity all create 6000.0 98347
fix 1 all nvt temp 3000.0 3000.0 0.1
# dump 1 all atom 500 Ca-O-melt.lammpstrj
thermo 100
run 1000

View File

@ -0,0 +1,38 @@
# Ca-O crystal with Pedone potential
units metal
atom_style charge
lattice fcc 4.8105 # experimental lattice parameter for fcc-lattice Ca cations
region box block 0 4 0 4 0 4
create_box 2 box
create_atoms 1 box
lattice fcc 4.8105 origin 0.5 0.5 0.5 # O anion lattice shifted by half a unit cell
create_atoms 2 box
mass 1 40.078
mass 2 15.999
displace_atoms all random 0.01 0.01 0.01 9084544
set type 1 charge 1.2
set type 2 charge -1.2
timestep 0.002
neigh_modify delay 5 every 1 check yes
pair_style hybrid/overlay pedone 15.0 coul/long 15.0
kspace_style pppm 1.0e-6
pair_coeff * * coul/long
pair_coeff 1 2 pedone 0.030211 2.241334 2.923245 5.0
pair_coeff 2 2 pedone 0.042395 1.379316 3.618701 22.0
variable len equal lx*0.25
thermo_style custom step v_len lx pe press
thermo 100
fix 1 all box/relax iso 0.0
minimize 0.0 0.0 1000 10000
print "Expected lattice parameter: 4.7748, computed: $(v_len:%6.4f)"

View File

@ -0,0 +1,122 @@
LAMMPS (7 Feb 2024 - Development - patch_7Feb2024_update1-373-g7ac84e18dd)
using 1 OpenMP thread(s) per MPI task
# Ca-O melt with Pedone potential
units metal
atom_style charge
lattice fcc 4.8105 # experimental lattice parameter for fcc-lattice Ca cations
Lattice spacing in x,y,z = 4.8105 4.8105 4.8105
region box block 0 4 0 4 0 4
create_box 2 box
Created orthogonal box = (0 0 0) to (19.242 19.242 19.242)
1 by 1 by 1 MPI processor grid
create_atoms 1 box
Created 256 atoms
using lattice units in orthogonal box = (0 0 0) to (19.242 19.242 19.242)
create_atoms CPU = 0.000 seconds
lattice fcc 4.8105 origin 0.5 0.5 0.5 # O anion lattice shifted by half a unit cell
Lattice spacing in x,y,z = 4.8105 4.8105 4.8105
create_atoms 2 box
Created 256 atoms
using lattice units in orthogonal box = (0 0 0) to (19.242 19.242 19.242)
create_atoms CPU = 0.000 seconds
mass 1 40.078
mass 2 15.999
set type 1 charge 1.2
Setting atom values ...
256 settings made for charge
set type 2 charge -1.2
Setting atom values ...
256 settings made for charge
timestep 0.002
neigh_modify delay 5 every 1 check yes
pair_style hybrid/overlay pedone 15.0 coul/long 15.0
kspace_style pppm 1.0e-6
pair_coeff * * coul/long
pair_coeff 1 2 pedone 0.030211 2.241334 2.923245 5.0
pair_coeff 2 2 pedone 0.042395 1.379316 3.618701 22.0
velocity all create 6000.0 98347
fix 1 all nvt temp 3000.0 3000.0 0.1
# dump 1 all atom 500 Ca-O-melt.lammpstrj
thermo 100
run 1000
PPPM initialization ...
using 12-bit tables for long-range coulomb (src/kspace.cpp:342)
G vector (1/distance) = 0.23676226
grid = 24 24 24
stencil order = 5
estimated absolute RMS force accuracy = 1.3089053e-05
estimated relative force accuracy = 9.089844e-07
using double precision FFTW3
3d grid and FFT values/proc = 29791 13824
Generated 0 of 1 mixed pair_coeff terms from geometric mixing rule
Neighbor list info ...
update: every = 1 steps, delay = 5 steps, check = yes
max neighbors/atom: 2000, page size: 100000
master list distance cutoff = 17
ghost atom cutoff = 17
binsize = 8.5, bins = 3 3 3
2 neighbor lists, perpetual/occasional/extra = 2 0 0
(1) pair pedone, perpetual, skip from (2)
attributes: half, newton on
pair build: skip
stencil: none
bin: none
(2) pair coul/long, perpetual
attributes: half, newton on
pair build: half/bin/atomonly/newton
stencil: half/bin/3d
bin: standard
Per MPI rank memory allocation (min/avg/max) = 9.239 | 9.239 | 9.239 Mbytes
Step Temp E_pair E_mol TotEng Press
0 6000 -3771.5568 0 -3375.2452 34213.185
100 2894.1756 -3562.491 0 -3371.3251 114640.32
200 2980.3531 -3570.2657 0 -3373.4076 123673.56
300 2783.0437 -3574.5809 0 -3390.7554 119791.27
400 3021.6581 -3568.2149 0 -3368.6285 116032.29
500 3112.0438 -3580.0178 0 -3374.4613 114798.18
600 2973.4609 -3577.0582 0 -3380.6553 111843.46
700 3180.1687 -3568.4542 0 -3358.3979 121008.83
800 2923.7803 -3573.3023 0 -3380.181 111459.55
900 2940.3133 -3572.1322 0 -3377.9188 118177.36
1000 3070.2584 -3575.5655 0 -3372.769 114175.52
Loop time of 13.683 on 1 procs for 1000 steps with 512 atoms
Performance: 12.629 ns/day, 1.900 hours/ns, 73.084 timesteps/s, 37.419 katom-step/s
99.9% CPU use with 1 MPI tasks x 1 OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 11.545 | 11.545 | 11.545 | 0.0 | 84.37
Kspace | 1.4121 | 1.4121 | 1.4121 | 0.0 | 10.32
Neigh | 0.65265 | 0.65265 | 0.65265 | 0.0 | 4.77
Comm | 0.056036 | 0.056036 | 0.056036 | 0.0 | 0.41
Output | 0.00022945 | 0.00022945 | 0.00022945 | 0.0 | 0.00
Modify | 0.0090252 | 0.0090252 | 0.0090252 | 0.0 | 0.07
Other | | 0.00801 | | | 0.06
Nlocal: 512 ave 512 max 512 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Nghost: 10901 ave 10901 max 10901 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Neighs: 374419 ave 374419 max 374419 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Total # of neighbors = 374419
Ave neighs/atom = 731.28711
Neighbor list builds = 71
Dangerous builds = 0
Total wall time: 0:00:13

View File

@ -0,0 +1,122 @@
LAMMPS (7 Feb 2024 - Development - patch_7Feb2024_update1-373-g7ac84e18dd)
using 1 OpenMP thread(s) per MPI task
# Ca-O melt with Pedone potential
units metal
atom_style charge
lattice fcc 4.8105 # experimental lattice parameter for fcc-lattice Ca cations
Lattice spacing in x,y,z = 4.8105 4.8105 4.8105
region box block 0 4 0 4 0 4
create_box 2 box
Created orthogonal box = (0 0 0) to (19.242 19.242 19.242)
1 by 2 by 2 MPI processor grid
create_atoms 1 box
Created 256 atoms
using lattice units in orthogonal box = (0 0 0) to (19.242 19.242 19.242)
create_atoms CPU = 0.000 seconds
lattice fcc 4.8105 origin 0.5 0.5 0.5 # O anion lattice shifted by half a unit cell
Lattice spacing in x,y,z = 4.8105 4.8105 4.8105
create_atoms 2 box
Created 256 atoms
using lattice units in orthogonal box = (0 0 0) to (19.242 19.242 19.242)
create_atoms CPU = 0.000 seconds
mass 1 40.078
mass 2 15.999
set type 1 charge 1.2
Setting atom values ...
256 settings made for charge
set type 2 charge -1.2
Setting atom values ...
256 settings made for charge
timestep 0.002
neigh_modify delay 5 every 1 check yes
pair_style hybrid/overlay pedone 15.0 coul/long 15.0
kspace_style pppm 1.0e-6
pair_coeff * * coul/long
pair_coeff 1 2 pedone 0.030211 2.241334 2.923245 5.0
pair_coeff 2 2 pedone 0.042395 1.379316 3.618701 22.0
velocity all create 6000.0 98347
fix 1 all nvt temp 3000.0 3000.0 0.1
# dump 1 all atom 500 Ca-O-melt.lammpstrj
thermo 100
run 1000
PPPM initialization ...
using 12-bit tables for long-range coulomb (src/kspace.cpp:342)
G vector (1/distance) = 0.23676226
grid = 24 24 24
stencil order = 5
estimated absolute RMS force accuracy = 1.3089053e-05
estimated relative force accuracy = 9.089844e-07
using double precision FFTW3
3d grid and FFT values/proc = 11191 3456
Generated 0 of 1 mixed pair_coeff terms from geometric mixing rule
Neighbor list info ...
update: every = 1 steps, delay = 5 steps, check = yes
max neighbors/atom: 2000, page size: 100000
master list distance cutoff = 17
ghost atom cutoff = 17
binsize = 8.5, bins = 3 3 3
2 neighbor lists, perpetual/occasional/extra = 2 0 0
(1) pair pedone, perpetual, skip from (2)
attributes: half, newton on
pair build: skip
stencil: none
bin: none
(2) pair coul/long, perpetual
attributes: half, newton on
pair build: half/bin/atomonly/newton
stencil: half/bin/3d
bin: standard
Per MPI rank memory allocation (min/avg/max) = 5.315 | 5.315 | 5.315 Mbytes
Step Temp E_pair E_mol TotEng Press
0 6000 -3771.5568 0 -3375.2452 34213.185
100 3050.0106 -3571.4712 0 -3370.0121 118480.04
200 3100.0073 -3571.2534 0 -3366.492 120618.37
300 2959.7127 -3580.0883 0 -3384.5935 109184.72
400 2922.7083 -3563.9803 0 -3370.9298 120165.71
500 3145.0439 -3571.3828 0 -3363.6465 115057.51
600 2741.7439 -3563.5077 0 -3382.4102 115504.31
700 2906.3636 -3567.3604 0 -3375.3895 119518.5
800 2995.3864 -3567.3838 0 -3369.5327 117975.22
900 2965.24 -3565.7983 0 -3369.9385 123362.35
1000 2916.6485 -3578.7471 0 -3386.0968 115624.78
Loop time of 4.50395 on 4 procs for 1000 steps with 512 atoms
Performance: 38.366 ns/day, 0.626 hours/ns, 222.028 timesteps/s, 113.678 katom-step/s
99.4% CPU use with 4 MPI tasks x 1 OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 3.2703 | 3.2983 | 3.3259 | 1.3 | 73.23
Kspace | 0.79815 | 0.82633 | 0.85342 | 2.6 | 18.35
Neigh | 0.18328 | 0.18398 | 0.18472 | 0.1 | 4.08
Comm | 0.17423 | 0.17508 | 0.17592 | 0.2 | 3.89
Output | 0.00019336 | 0.0002167 | 0.00028554 | 0.0 | 0.00
Modify | 0.0089842 | 0.0091093 | 0.0092205 | 0.1 | 0.20
Other | | 0.01096 | | | 0.24
Nlocal: 128 ave 143 max 118 min
Histogram: 2 0 0 0 0 1 0 0 0 1
Nghost: 7622.75 ave 7651 max 7598 min
Histogram: 1 0 0 1 1 0 0 0 0 1
Neighs: 93581.8 ave 106456 max 84898 min
Histogram: 1 1 0 0 1 0 0 0 0 1
Total # of neighbors = 374327
Ave neighs/atom = 731.10742
Neighbor list builds = 71
Dangerous builds = 0
Total wall time: 0:00:04

View File

@ -0,0 +1,134 @@
LAMMPS (7 Feb 2024 - Development - patch_7Feb2024_update1-373-g7ac84e18dd)
using 1 OpenMP thread(s) per MPI task
# Ca-O crystal with Pedone potential
units metal
atom_style charge
lattice fcc 4.8105 # experimental lattice parameter for fcc-lattice Ca cations
Lattice spacing in x,y,z = 4.8105 4.8105 4.8105
region box block 0 4 0 4 0 4
create_box 2 box
Created orthogonal box = (0 0 0) to (19.242 19.242 19.242)
1 by 1 by 1 MPI processor grid
create_atoms 1 box
Created 256 atoms
using lattice units in orthogonal box = (0 0 0) to (19.242 19.242 19.242)
create_atoms CPU = 0.000 seconds
lattice fcc 4.8105 origin 0.5 0.5 0.5 # O anion lattice shifted by half a unit cell
Lattice spacing in x,y,z = 4.8105 4.8105 4.8105
create_atoms 2 box
Created 256 atoms
using lattice units in orthogonal box = (0 0 0) to (19.242 19.242 19.242)
create_atoms CPU = 0.000 seconds
mass 1 40.078
mass 2 15.999
displace_atoms all random 0.01 0.01 0.01 9084544
Displacing atoms ...
set type 1 charge 1.2
Setting atom values ...
256 settings made for charge
set type 2 charge -1.2
Setting atom values ...
256 settings made for charge
timestep 0.002
neigh_modify delay 5 every 1 check yes
pair_style hybrid/overlay pedone 15.0 coul/long 15.0
kspace_style pppm 1.0e-6
pair_coeff * * coul/long
pair_coeff 1 2 pedone 0.030211 2.241334 2.923245 5.0
pair_coeff 2 2 pedone 0.042395 1.379316 3.618701 22.0
variable len equal lx*0.25
thermo_style custom step v_len lx pe press
thermo 100
fix 1 all box/relax iso 0.0
minimize 0.0 0.0 1000 10000
Switching to 'neigh_modify every 1 delay 0 check yes' setting during minimization
PPPM initialization ...
using 12-bit tables for long-range coulomb (src/kspace.cpp:342)
G vector (1/distance) = 0.23676226
grid = 24 24 24
stencil order = 5
estimated absolute RMS force accuracy = 1.3089053e-05
estimated relative force accuracy = 9.089844e-07
using double precision FFTW3
3d grid and FFT values/proc = 29791 13824
Generated 0 of 1 mixed pair_coeff terms from geometric mixing rule
Neighbor list info ...
update: every = 1 steps, delay = 0 steps, check = yes
max neighbors/atom: 2000, page size: 100000
master list distance cutoff = 17
ghost atom cutoff = 17
binsize = 8.5, bins = 3 3 3
2 neighbor lists, perpetual/occasional/extra = 2 0 0
(1) pair pedone, perpetual, skip from (2)
attributes: half, newton on
pair build: skip
stencil: none
bin: none
(2) pair coul/long, perpetual
attributes: half, newton on
pair build: half/bin/atomonly/newton
stencil: half/bin/3d
bin: standard
WARNING: Energy due to 1 extra global DOFs will be included in minimizer energies
(src/min.cpp:219)
Per MPI rank memory allocation (min/avg/max) = 10.33 | 10.33 | 10.33 Mbytes
Step v_len Lx PotEng Press
0 4.8105 19.242 -3765.9116 -21299.914
100 4.7797128 19.118851 -3767.814 -164.13101
200 4.7787507 19.115003 -3769.1366 -373.58797
300 4.7768265 19.107306 -3770.5634 48.944709
400 4.7768265 19.107306 -3770.9879 -258.56116
500 4.7758644 19.103458 -3771.3898 173.91894
600 4.7758644 19.103458 -3771.7586 -91.813678
700 4.7758644 19.103458 -3771.9842 -252.52883
800 4.7749023 19.099609 -3772.3526 216.83318
857 4.7747927 19.099171 -3772.8223 32.586251
Loop time of 18.0592 on 1 procs for 857 steps with 512 atoms
99.8% CPU use with 1 MPI tasks x 1 OpenMP threads
Minimization stats:
Stopping criterion = linesearch alpha is zero
Energy initial, next-to-last, final =
-3765.91161156884 -3772.82226663623 -3772.82226663623
Force two-norm initial, final = 284.3967 0.46963871
Force max component initial, final = 284.14458 0.42827677
Final line search alpha, max atom move = 2.8580337e-08 1.2240294e-08
Iterations, force evaluations = 857 894
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 13.907 | 13.907 | 13.907 | 0.0 | 77.01
Kspace | 1.3809 | 1.3809 | 1.3809 | 0.0 | 7.65
Neigh | 0 | 0 | 0 | 0.0 | 0.00
Comm | 0.045871 | 0.045871 | 0.045871 | 0.0 | 0.25
Output | 0.0002809 | 0.0002809 | 0.0002809 | 0.0 | 0.00
Modify | 0 | 0 | 0 | 0.0 | 0.00
Other | | 2.726 | | | 15.09
Nlocal: 512 ave 512 max 512 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Nghost: 11655 ave 11655 max 11655 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Neighs: 372155 ave 372155 max 372155 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Total # of neighbors = 372155
Ave neighs/atom = 726.86523
Neighbor list builds = 0
Dangerous builds = 0
print "Expected lattice parameter: 4.7748, computed: $(v_len:%6.4f)"
Expected lattice parameter: 4.7748, computed: 4.7748
Total wall time: 0:00:18

View File

@ -0,0 +1,134 @@
LAMMPS (7 Feb 2024 - Development - patch_7Feb2024_update1-373-g7ac84e18dd)
using 1 OpenMP thread(s) per MPI task
# Ca-O crystal with Pedone potential
units metal
atom_style charge
lattice fcc 4.8105 # experimental lattice parameter for fcc-lattice Ca cations
Lattice spacing in x,y,z = 4.8105 4.8105 4.8105
region box block 0 4 0 4 0 4
create_box 2 box
Created orthogonal box = (0 0 0) to (19.242 19.242 19.242)
1 by 2 by 2 MPI processor grid
create_atoms 1 box
Created 256 atoms
using lattice units in orthogonal box = (0 0 0) to (19.242 19.242 19.242)
create_atoms CPU = 0.000 seconds
lattice fcc 4.8105 origin 0.5 0.5 0.5 # O anion lattice shifted by half a unit cell
Lattice spacing in x,y,z = 4.8105 4.8105 4.8105
create_atoms 2 box
Created 256 atoms
using lattice units in orthogonal box = (0 0 0) to (19.242 19.242 19.242)
create_atoms CPU = 0.000 seconds
mass 1 40.078
mass 2 15.999
displace_atoms all random 0.01 0.01 0.01 9084544
Displacing atoms ...
set type 1 charge 1.2
Setting atom values ...
256 settings made for charge
set type 2 charge -1.2
Setting atom values ...
256 settings made for charge
timestep 0.002
neigh_modify delay 5 every 1 check yes
pair_style hybrid/overlay pedone 15.0 coul/long 15.0
kspace_style pppm 1.0e-6
pair_coeff * * coul/long
pair_coeff 1 2 pedone 0.030211 2.241334 2.923245 5.0
pair_coeff 2 2 pedone 0.042395 1.379316 3.618701 22.0
variable len equal lx*0.25
thermo_style custom step v_len lx pe press
thermo 100
fix 1 all box/relax iso 0.0
minimize 0.0 0.0 1000 10000
Switching to 'neigh_modify every 1 delay 0 check yes' setting during minimization
PPPM initialization ...
using 12-bit tables for long-range coulomb (src/kspace.cpp:342)
G vector (1/distance) = 0.23676226
grid = 24 24 24
stencil order = 5
estimated absolute RMS force accuracy = 1.3089053e-05
estimated relative force accuracy = 9.089844e-07
using double precision FFTW3
3d grid and FFT values/proc = 11191 3456
Generated 0 of 1 mixed pair_coeff terms from geometric mixing rule
Neighbor list info ...
update: every = 1 steps, delay = 0 steps, check = yes
max neighbors/atom: 2000, page size: 100000
master list distance cutoff = 17
ghost atom cutoff = 17
binsize = 8.5, bins = 3 3 3
2 neighbor lists, perpetual/occasional/extra = 2 0 0
(1) pair pedone, perpetual, skip from (2)
attributes: half, newton on
pair build: skip
stencil: none
bin: none
(2) pair coul/long, perpetual
attributes: half, newton on
pair build: half/bin/atomonly/newton
stencil: half/bin/3d
bin: standard
WARNING: Energy due to 1 extra global DOFs will be included in minimizer energies
(src/min.cpp:219)
Per MPI rank memory allocation (min/avg/max) = 6.44 | 6.44 | 6.44 Mbytes
Step v_len Lx PotEng Press
0 4.8105 19.242 -3765.9116 -21299.914
100 4.7797128 19.118851 -3767.814 -164.13101
200 4.7787507 19.115003 -3769.1367 -373.59489
300 4.7768265 19.107306 -3770.5868 32.046893
400 4.7768265 19.107306 -3771.0322 -290.69703
500 4.7758644 19.103458 -3771.4223 150.34606
600 4.7758644 19.103458 -3771.7941 -117.26938
700 4.7758644 19.103458 -3772.0193 -277.34372
800 4.7749023 19.099609 -3772.42 171.95177
860 4.7748339 19.099336 -3772.8237 1.0976356
Loop time of 5.65601 on 4 procs for 860 steps with 512 atoms
99.5% CPU use with 4 MPI tasks x 1 OpenMP threads
Minimization stats:
Stopping criterion = linesearch alpha is zero
Energy initial, next-to-last, final =
-3765.91161156888 -3772.82365446552 -3772.82365446552
Force two-norm initial, final = 284.3967 0.067746634
Force max component initial, final = 284.14458 0.014426328
Final line search alpha, max atom move = 1.9073486e-06 2.7516038e-08
Iterations, force evaluations = 860 922
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 3.7408 | 3.8442 | 4.0543 | 6.5 | 67.97
Kspace | 0.60187 | 0.81211 | 0.91543 | 14.1 | 14.36
Neigh | 0 | 0 | 0 | 0.0 | 0.00
Comm | 0.14969 | 0.15017 | 0.15071 | 0.1 | 2.66
Output | 0.00019203 | 0.00020711 | 0.0002511 | 0.0 | 0.00
Modify | 0 | 0 | 0 | 0.0 | 0.00
Other | | 0.8494 | | | 15.02
Nlocal: 128 ave 135 max 123 min
Histogram: 1 0 1 0 1 0 0 0 0 1
Nghost: 8175 ave 8180 max 8168 min
Histogram: 1 0 0 0 0 1 0 1 0 1
Neighs: 93038.8 ave 98164 max 89373 min
Histogram: 1 0 1 0 1 0 0 0 0 1
Total # of neighbors = 372155
Ave neighs/atom = 726.86523
Neighbor list builds = 0
Dangerous builds = 0
print "Expected lattice parameter: 4.7748, computed: $(v_len:%6.4f)"
Expected lattice parameter: 4.7748, computed: 4.7748
Total wall time: 0:00:05

View File

@ -1,260 +0,0 @@
LAMMPS (29 Mar 2019)
using 1 OpenMP thread(s) per MPI task
# 2d micelle simulation
dimension 2
neighbor 0.3 bin
neigh_modify delay 5
atom_style bond
# Soft potential push-off
read_data data.micelle
orthogonal box = (0 0 -0.1) to (35.8569 35.8569 0.1)
1 by 1 by 1 MPI processor grid
reading atoms ...
1200 atoms
scanning bonds ...
1 = max bonds/atom
reading bonds ...
300 bonds
2 = max # of 1-2 neighbors
1 = max # of 1-3 neighbors
1 = max # of 1-4 neighbors
2 = max # of special neighbors
special bonds CPU = 0.000473022 secs
read_data CPU = 0.0024147 secs
special_bonds fene
2 = max # of 1-2 neighbors
2 = max # of special neighbors
special bonds CPU = 0.00022316 secs
pair_style soft 1.12246
pair_coeff * * 0.0 1.12246
bond_style harmonic
bond_coeff 1 50.0 0.75
velocity all create 0.45 2349852
variable prefactor equal ramp(1.0,20.0)
fix 1 all nve
fix 2 all temp/rescale 100 0.45 0.45 0.02 1.0
fix 3 all adapt 1 pair soft a * * v_prefactor
fix 4 all enforce2d
thermo 50
run 500
Neighbor list info ...
update every 1 steps, delay 5 steps, check yes
max neighbors/atom: 2000, page size: 100000
master list distance cutoff = 1.42246
ghost atom cutoff = 1.42246
binsize = 0.71123, bins = 51 51 1
1 neighbor lists, perpetual/occasional/extra = 1 0 0
(1) pair soft, perpetual
attributes: half, newton on
pair build: half/bin/newton
stencil: half/bin/2d/newton
bin: standard
Per MPI rank memory allocation (min/avg/max) = 3.799 | 3.799 | 3.799 Mbytes
Step Temp E_pair E_mol TotEng Press
0 0.45 0.40003481 2.2200223e-06 0.84966203 0.78952518
50 0.54981866 0.93548899 0.068440043 1.5532895 1.9232786
100 0.45 0.99659327 0.079228519 1.5254468 3.2135679
150 0.86965411 0.90456016 0.07493355 1.8484231 4.3821925
200 0.45 1.01454 0.10663502 1.5708 4.7598476
250 0.79636561 0.82567712 0.12105337 1.7424325 5.4983899
300 0.45 0.86475538 0.11819875 1.4325791 5.8554758
350 0.72135464 0.70693069 0.10912636 1.5368106 6.0388247
400 0.45 0.75067331 0.14165013 1.3419484 6.3840708
450 0.64839221 0.62402486 0.14173679 1.4136135 6.4791009
500 0.45 0.66669513 0.13695201 1.2532721 6.807146
Loop time of 0.103162 on 1 procs for 500 steps with 1200 atoms
Performance: 2093802.885 tau/day, 4846.766 timesteps/s
99.6% CPU use with 1 MPI tasks x 1 OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 0.068308 | 0.068308 | 0.068308 | 0.0 | 66.21
Bond | 0.004235 | 0.004235 | 0.004235 | 0.0 | 4.11
Neigh | 0.014069 | 0.014069 | 0.014069 | 0.0 | 13.64
Comm | 0.0019219 | 0.0019219 | 0.0019219 | 0.0 | 1.86
Output | 0.00017262 | 0.00017262 | 0.00017262 | 0.0 | 0.17
Modify | 0.011728 | 0.011728 | 0.011728 | 0.0 | 11.37
Other | | 0.002726 | | | 2.64
Nlocal: 1200 ave 1200 max 1200 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Nghost: 197 ave 197 max 197 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Neighs: 3094 ave 3094 max 3094 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Total # of neighbors = 3094
Ave neighs/atom = 2.57833
Ave special neighs/atom = 0.5
Neighbor list builds = 52
Dangerous builds = 0
unfix 3
# Main run
pair_style lj/cut 2.5
# solvent/head - full-size and long-range
pair_coeff 1 1 1.0 1.0 2.5
pair_coeff 2 2 1.0 1.0 2.5
pair_coeff 1 2 1.0 1.0 2.5
# tail/tail - size-averaged and long-range
pair_coeff 3 3 1.0 0.75 2.5
pair_coeff 4 4 1.0 0.50 2.5
pair_coeff 3 4 1.0 0.67 2.5
# solvent/tail - full-size and repulsive
pair_coeff 1 3 1.0 1.0 1.12246
pair_coeff 1 4 1.0 1.0 1.12246
# head/tail - size-averaged and repulsive
pair_coeff 2 3 1.0 0.88 1.12246
pair_coeff 2 4 1.0 0.75 1.12246
thermo 50
#dump 1 all atom 2000 dump.micelle
#dump 2 all image 2000 image.*.jpg type type zoom 1.6
#dump_modify 2 pad 5 adiam 1 0.5 adiam 2 1.5 adiam 3 1.0 adiam 4 0.75
#dump 3 all movie 2000 movie.mpg type type zoom 1.6
#dump_modify 3 pad 5 adiam 1 0.5 adiam 2 1.5 adiam 3 1.0 adiam 4 0.75
reset_timestep 0
group solvent molecule 0
750 atoms in group solvent
group solute subtract all solvent
450 atoms in group solute
unfix 1
unfix 2
unfix 4
fix 1 solvent nve
fix 2 solvent temp/rescale 100 0.45 0.45 0.02 1.0
fix 5 solute rigid molecule langevin 0.45 0.45 0.5 112211
150 rigid bodies with 450 atoms
fix 4 all enforce2d
run 500
Neighbor list info ...
update every 1 steps, delay 5 steps, check yes
max neighbors/atom: 2000, page size: 100000
master list distance cutoff = 2.8
ghost atom cutoff = 2.8
binsize = 1.4, bins = 26 26 1
1 neighbor lists, perpetual/occasional/extra = 1 0 0
(1) pair lj/cut, perpetual
attributes: half, newton on
pair build: half/bin/newton
stencil: half/bin/2d/newton
bin: standard
Per MPI rank memory allocation (min/avg/max) = 5.274 | 5.274 | 5.274 Mbytes
Step Temp E_pair E_mol TotEng Press
0 0.45318168 -1.3753652 0.13695201 -0.8705807 1.975423
50 0.77871641 -1.6955252 0.13695201 -0.92651507 0.64222539
100 0.5336062 -1.7124572 0.13695201 -1.1423948 -0.11959696
150 0.58789067 -1.7926109 0.13695201 -1.1784877 1.2592743
200 0.47864796 -1.8040298 0.13695201 -1.2785752 3.6739793
250 0.51124651 -1.8614797 0.13695201 -1.309566 2.5817722
300 0.45695639 -1.8708384 0.13695201 -1.3629901 3.0833794
350 0.477504 -1.8924359 0.13695201 -1.3679098 -5.1605926
400 0.45328205 -1.87754 0.13695201 -1.372674 -4.0355858
450 0.47465031 -1.9071924 0.13695201 -1.3849826 3.1949617
500 0.45533691 -1.9072316 0.13695201 -1.4006978 0.48079061
Loop time of 0.178806 on 1 procs for 500 steps with 1200 atoms
Performance: 1208012.705 tau/day, 2796.326 timesteps/s
99.6% CPU use with 1 MPI tasks x 1 OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 0.086131 | 0.086131 | 0.086131 | 0.0 | 48.17
Bond | 0.0042472 | 0.0042472 | 0.0042472 | 0.0 | 2.38
Neigh | 0.021317 | 0.021317 | 0.021317 | 0.0 | 11.92
Comm | 0.0025985 | 0.0025985 | 0.0025985 | 0.0 | 1.45
Output | 0.000175 | 0.000175 | 0.000175 | 0.0 | 0.10
Modify | 0.061408 | 0.061408 | 0.061408 | 0.0 | 34.34
Other | | 0.00293 | | | 1.64
Nlocal: 1200 ave 1200 max 1200 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Nghost: 416 ave 416 max 416 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Neighs: 8769 ave 8769 max 8769 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Total # of neighbors = 8769
Ave neighs/atom = 7.3075
Ave special neighs/atom = 0.5
Neighbor list builds = 47
Dangerous builds = 2
unfix 2
unfix 4
unfix 5
fix 5 solute rigid/small molecule
create bodies CPU = 0.00015378 secs
150 rigid bodies with 450 atoms
1.30435 = max distance from body owner to body atom
fix 4 all enforce2d
run 500
Per MPI rank memory allocation (min/avg/max) = 8.64 | 8.64 | 8.64 Mbytes
Step Temp E_pair E_mol TotEng Press
500 0.45533691 -1.9072316 0.13695201 -1.4006978 2.4545793
550 0.45627282 -1.912409 0.13695201 -1.4051155 2.1845065
600 0.44734553 -1.8890695 0.13695201 -1.389022 2.3458965
650 0.46444648 -1.9042462 0.13695201 -1.3903185 2.1609319
700 0.47113236 -1.8977576 0.13695201 -1.3784032 2.2420351
750 0.48554548 -1.9253545 0.13695201 -1.3943015 2.143907
800 0.46350091 -1.8865749 0.13695201 -1.3734146 2.294431
850 0.4766104 -1.9094039 0.13695201 -1.3856031 2.2077157
900 0.48988467 -1.9051538 0.13695201 -1.3705787 2.0107056
950 0.48351943 -1.9162485 0.13695201 -1.3868399 2.1891332
1000 0.49033701 -1.9115165 0.13695201 -1.3765742 2.1508141
Loop time of 0.166502 on 1 procs for 500 steps with 1200 atoms
Performance: 1297278.008 tau/day, 3002.958 timesteps/s
99.6% CPU use with 1 MPI tasks x 1 OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 0.085767 | 0.085767 | 0.085767 | 0.0 | 51.51
Bond | 0.0042562 | 0.0042562 | 0.0042562 | 0.0 | 2.56
Neigh | 0.018039 | 0.018039 | 0.018039 | 0.0 | 10.83
Comm | 0.0024002 | 0.0024002 | 0.0024002 | 0.0 | 1.44
Output | 0.00018239 | 0.00018239 | 0.00018239 | 0.0 | 0.11
Modify | 0.052717 | 0.052717 | 0.052717 | 0.0 | 31.66
Other | | 0.003141 | | | 1.89
Nlocal: 1200 ave 1200 max 1200 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Nghost: 415 ave 415 max 415 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Neighs: 8743 ave 8743 max 8743 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Total # of neighbors = 8743
Ave neighs/atom = 7.28583
Ave special neighs/atom = 0.5
Neighbor list builds = 40
Dangerous builds = 0
Total wall time: 0:00:00

View File

@ -1,260 +0,0 @@
LAMMPS (29 Mar 2019)
using 1 OpenMP thread(s) per MPI task
# 2d micelle simulation
dimension 2
neighbor 0.3 bin
neigh_modify delay 5
atom_style bond
# Soft potential push-off
read_data data.micelle
orthogonal box = (0 0 -0.1) to (35.8569 35.8569 0.1)
2 by 2 by 1 MPI processor grid
reading atoms ...
1200 atoms
scanning bonds ...
1 = max bonds/atom
reading bonds ...
300 bonds
2 = max # of 1-2 neighbors
1 = max # of 1-3 neighbors
1 = max # of 1-4 neighbors
2 = max # of special neighbors
special bonds CPU = 0.000422001 secs
read_data CPU = 0.00473404 secs
special_bonds fene
2 = max # of 1-2 neighbors
2 = max # of special neighbors
special bonds CPU = 0.000183344 secs
pair_style soft 1.12246
pair_coeff * * 0.0 1.12246
bond_style harmonic
bond_coeff 1 50.0 0.75
velocity all create 0.45 2349852
variable prefactor equal ramp(1.0,20.0)
fix 1 all nve
fix 2 all temp/rescale 100 0.45 0.45 0.02 1.0
fix 3 all adapt 1 pair soft a * * v_prefactor
fix 4 all enforce2d
thermo 50
run 500
Neighbor list info ...
update every 1 steps, delay 5 steps, check yes
max neighbors/atom: 2000, page size: 100000
master list distance cutoff = 1.42246
ghost atom cutoff = 1.42246
binsize = 0.71123, bins = 51 51 1
1 neighbor lists, perpetual/occasional/extra = 1 0 0
(1) pair soft, perpetual
attributes: half, newton on
pair build: half/bin/newton
stencil: half/bin/2d/newton
bin: standard
Per MPI rank memory allocation (min/avg/max) = 3.758 | 3.85 | 4.126 Mbytes
Step Temp E_pair E_mol TotEng Press
0 0.45 0.40003481 2.2200223e-06 0.84966203 0.78952518
50 0.54981866 0.93548899 0.068440043 1.5532895 1.9232786
100 0.45 0.99659327 0.079228519 1.5254468 3.2135679
150 0.86965411 0.90456016 0.07493355 1.8484231 4.3821925
200 0.45 1.01454 0.10663502 1.5708 4.7598476
250 0.79636561 0.82567712 0.12105337 1.7424325 5.4983899
300 0.45 0.86475538 0.11819875 1.4325791 5.8554758
350 0.72135464 0.70693069 0.10912636 1.5368106 6.0388247
400 0.45 0.75067331 0.14165013 1.3419484 6.3840708
450 0.64839221 0.62402486 0.14173679 1.4136135 6.4791009
500 0.45 0.66669513 0.13695201 1.2532721 6.807146
Loop time of 0.0426326 on 4 procs for 500 steps with 1200 atoms
Performance: 5066547.720 tau/day, 11728.120 timesteps/s
98.7% CPU use with 4 MPI tasks x 1 OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 0.016784 | 0.019254 | 0.022154 | 1.5 | 45.16
Bond | 0.0010612 | 0.0012558 | 0.0014153 | 0.4 | 2.95
Neigh | 0.0046048 | 0.0046697 | 0.0047245 | 0.1 | 10.95
Comm | 0.0064592 | 0.0097114 | 0.012527 | 2.4 | 22.78
Output | 0.00022507 | 0.00026393 | 0.00033951 | 0.0 | 0.62
Modify | 0.0041659 | 0.0048084 | 0.0053945 | 0.8 | 11.28
Other | | 0.002669 | | | 6.26
Nlocal: 300 ave 304 max 292 min
Histogram: 1 0 0 0 0 0 0 0 2 1
Nghost: 103.5 ave 108 max 98 min
Histogram: 1 0 0 1 0 0 0 0 0 2
Neighs: 773.5 ave 792 max 735 min
Histogram: 1 0 0 0 0 0 0 0 2 1
Total # of neighbors = 3094
Ave neighs/atom = 2.57833
Ave special neighs/atom = 0.5
Neighbor list builds = 52
Dangerous builds = 0
unfix 3
# Main run
pair_style lj/cut 2.5
# solvent/head - full-size and long-range
pair_coeff 1 1 1.0 1.0 2.5
pair_coeff 2 2 1.0 1.0 2.5
pair_coeff 1 2 1.0 1.0 2.5
# tail/tail - size-averaged and long-range
pair_coeff 3 3 1.0 0.75 2.5
pair_coeff 4 4 1.0 0.50 2.5
pair_coeff 3 4 1.0 0.67 2.5
# solvent/tail - full-size and repulsive
pair_coeff 1 3 1.0 1.0 1.12246
pair_coeff 1 4 1.0 1.0 1.12246
# head/tail - size-averaged and repulsive
pair_coeff 2 3 1.0 0.88 1.12246
pair_coeff 2 4 1.0 0.75 1.12246
thermo 50
#dump 1 all atom 2000 dump.micelle
#dump 2 all image 2000 image.*.jpg type type zoom 1.6
#dump_modify 2 pad 5 adiam 1 0.5 adiam 2 1.5 adiam 3 1.0 adiam 4 0.75
#dump 3 all movie 2000 movie.mpg type type zoom 1.6
#dump_modify 3 pad 5 adiam 1 0.5 adiam 2 1.5 adiam 3 1.0 adiam 4 0.75
reset_timestep 0
group solvent molecule 0
750 atoms in group solvent
group solute subtract all solvent
450 atoms in group solute
unfix 1
unfix 2
unfix 4
fix 1 solvent nve
fix 2 solvent temp/rescale 100 0.45 0.45 0.02 1.0
fix 5 solute rigid molecule langevin 0.45 0.45 0.5 112211
150 rigid bodies with 450 atoms
fix 4 all enforce2d
run 500
Neighbor list info ...
update every 1 steps, delay 5 steps, check yes
max neighbors/atom: 2000, page size: 100000
master list distance cutoff = 2.8
ghost atom cutoff = 2.8
binsize = 1.4, bins = 26 26 1
1 neighbor lists, perpetual/occasional/extra = 1 0 0
(1) pair lj/cut, perpetual
attributes: half, newton on
pair build: half/bin/newton
stencil: half/bin/2d/newton
bin: standard
Per MPI rank memory allocation (min/avg/max) = 5.251 | 5.282 | 5.374 Mbytes
Step Temp E_pair E_mol TotEng Press
0 0.45318168 -1.3753652 0.13695201 -0.8705807 1.975423
50 0.77871641 -1.6955252 0.13695201 -0.92651507 0.64222539
100 0.5336062 -1.7124572 0.13695201 -1.1423948 -0.11959696
150 0.58789067 -1.7926109 0.13695201 -1.1784877 1.2592743
200 0.47864796 -1.8040298 0.13695201 -1.2785752 3.6739793
250 0.51124651 -1.8614797 0.13695201 -1.309566 2.5817722
300 0.45695639 -1.8708384 0.13695201 -1.3629901 3.0833794
350 0.477504 -1.8924359 0.13695201 -1.3679098 -5.1605926
400 0.45328205 -1.87754 0.13695201 -1.372674 -4.0355858
450 0.47465031 -1.9071924 0.13695201 -1.3849826 3.1949617
500 0.45533691 -1.9072316 0.13695201 -1.4006978 0.48079061
Loop time of 0.0887392 on 4 procs for 500 steps with 1200 atoms
Performance: 2434100.210 tau/day, 5634.491 timesteps/s
98.9% CPU use with 4 MPI tasks x 1 OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 0.022611 | 0.022839 | 0.023082 | 0.1 | 25.74
Bond | 0.0010793 | 0.0011569 | 0.0012515 | 0.2 | 1.30
Neigh | 0.0064609 | 0.0064996 | 0.0065265 | 0.0 | 7.32
Comm | 0.0071712 | 0.0073687 | 0.0077734 | 0.3 | 8.30
Output | 0.00023389 | 0.00025356 | 0.00030327 | 0.0 | 0.29
Modify | 0.047258 | 0.047683 | 0.048503 | 0.2 | 53.73
Other | | 0.002938 | | | 3.31
Nlocal: 300 ave 309 max 291 min
Histogram: 1 0 0 1 0 0 1 0 0 1
Nghost: 218.75 ave 223 max 216 min
Histogram: 1 0 2 0 0 0 0 0 0 1
Neighs: 2192.25 ave 2251 max 2113 min
Histogram: 1 0 0 1 0 0 0 0 0 2
Total # of neighbors = 8769
Ave neighs/atom = 7.3075
Ave special neighs/atom = 0.5
Neighbor list builds = 47
Dangerous builds = 2
unfix 2
unfix 4
unfix 5
fix 5 solute rigid/small molecule
create bodies CPU = 7.70092e-05 secs
150 rigid bodies with 450 atoms
1.30435 = max distance from body owner to body atom
fix 4 all enforce2d
run 500
Per MPI rank memory allocation (min/avg/max) = 8.565 | 8.597 | 8.69 Mbytes
Step Temp E_pair E_mol TotEng Press
500 0.45533691 -1.9072316 0.13695201 -1.4006978 2.4545793
550 0.45627282 -1.912409 0.13695201 -1.4051155 2.1845065
600 0.44734553 -1.8890695 0.13695201 -1.389022 2.3458965
650 0.46444648 -1.9042462 0.13695201 -1.3903185 2.1609319
700 0.47113236 -1.8977576 0.13695201 -1.3784032 2.2420351
750 0.48554548 -1.9253545 0.13695201 -1.3943015 2.143907
800 0.46350091 -1.8865749 0.13695201 -1.3734146 2.294431
850 0.4766104 -1.9094039 0.13695201 -1.3856031 2.2077157
900 0.48988467 -1.9051538 0.13695201 -1.3705787 2.0107056
950 0.48351942 -1.9162485 0.13695201 -1.3868399 2.1891332
1000 0.490337 -1.9115164 0.13695201 -1.3765742 2.1508141
Loop time of 0.0588261 on 4 procs for 500 steps with 1200 atoms
Performance: 3671840.233 tau/day, 8499.630 timesteps/s
98.3% CPU use with 4 MPI tasks x 1 OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 0.022407 | 0.022631 | 0.0229 | 0.1 | 38.47
Bond | 0.0010669 | 0.0011355 | 0.0012124 | 0.2 | 1.93
Neigh | 0.0052333 | 0.00528 | 0.0053182 | 0.0 | 8.98
Comm | 0.0063677 | 0.0066406 | 0.0068488 | 0.2 | 11.29
Output | 0.00023055 | 0.00024778 | 0.00028086 | 0.0 | 0.42
Modify | 0.020577 | 0.020651 | 0.020834 | 0.1 | 35.11
Other | | 0.00224 | | | 3.81
Nlocal: 300 ave 303 max 295 min
Histogram: 1 0 0 0 0 0 1 0 1 1
Nghost: 219 ave 224 max 215 min
Histogram: 1 0 0 1 1 0 0 0 0 1
Neighs: 2185.75 ave 2244 max 2143 min
Histogram: 1 1 0 0 0 1 0 0 0 1
Total # of neighbors = 8743
Ave neighs/atom = 7.28583
Ave special neighs/atom = 0.5
Neighbor list builds = 40
Dangerous builds = 0
Total wall time: 0:00:00

View File

@ -0,0 +1,271 @@
LAMMPS (7 Feb 2024 - Development - patch_7Feb2024_update1-345-g506bf886ee-modified)
# 2d micelle simulation
dimension 2
neighbor 0.3 bin
neigh_modify delay 5
atom_style bond
# Soft potential push-off
read_data data.micelle
Reading data file ...
orthogonal box = (0 0 -0.1) to (35.85686 35.85686 0.1)
1 by 1 by 1 MPI processor grid
reading atoms ...
1200 atoms
scanning bonds ...
1 = max bonds/atom
reading bonds ...
300 bonds
Finding 1-2 1-3 1-4 neighbors ...
special bond factors lj: 0 0 0
special bond factors coul: 0 0 0
2 = max # of 1-2 neighbors
1 = max # of 1-3 neighbors
1 = max # of 1-4 neighbors
2 = max # of special neighbors
special bonds CPU = 0.000 seconds
read_data CPU = 0.005 seconds
special_bonds fene
Finding 1-2 1-3 1-4 neighbors ...
special bond factors lj: 0 1 1
special bond factors coul: 0 1 1
2 = max # of 1-2 neighbors
2 = max # of special neighbors
special bonds CPU = 0.000 seconds
pair_style soft 1.12246
pair_coeff * * 0.0 1.12246
bond_style harmonic
bond_coeff 1 50.0 0.75
velocity all create 0.45 2349852
variable prefactor equal ramp(1.0,20.0)
fix 1 all nve
fix 2 all temp/rescale 100 0.45 0.45 0.02 1.0
fix 3 all adapt 1 pair soft a * * v_prefactor
fix 4 all enforce2d
thermo 50
run 500
Generated 0 of 6 mixed pair_coeff terms from geometric mixing rule
WARNING: Communication cutoff 1.42246 is shorter than a bond length based estimate of 1.425. This may lead to errors. (../comm.cpp:730)
Neighbor list info ...
update: every = 1 steps, delay = 5 steps, check = yes
max neighbors/atom: 2000, page size: 100000
master list distance cutoff = 1.42246
ghost atom cutoff = 1.42246
binsize = 0.71123, bins = 51 51 1
1 neighbor lists, perpetual/occasional/extra = 1 0 0
(1) pair soft, perpetual
attributes: half, newton on
pair build: half/bin/newton
stencil: half/bin/2d
bin: standard
WARNING: Communication cutoff 1.42246 is shorter than a bond length based estimate of 1.425. This may lead to errors. (../comm.cpp:730)
Per MPI rank memory allocation (min/avg/max) = 4.148 | 4.148 | 4.148 Mbytes
Step Temp E_pair E_mol TotEng Press
0 0.45 0.40003481 2.2200223e-06 0.84966203 0.78952518
50 0.54981866 0.93548899 0.068440043 1.5532895 1.9232786
100 0.45 0.99659327 0.079228519 1.5254468 3.2135679
150 0.86965411 0.90456016 0.07493355 1.8484231 4.3821925
200 0.45 1.01454 0.10663502 1.5708 4.7598476
250 0.79636561 0.82567712 0.12105337 1.7424325 5.4983899
300 0.45 0.86475538 0.11819875 1.4325791 5.8554758
350 0.72135464 0.70693069 0.10912636 1.5368106 6.0388247
400 0.45 0.75067331 0.14165013 1.3419484 6.3840708
450 0.64839221 0.62402486 0.14173679 1.4136135 6.4791009
500 0.45 0.66669513 0.13695201 1.2532721 6.807146
Loop time of 0.0365221 on 1 procs for 500 steps with 1200 atoms
Performance: 5914221.123 tau/day, 13690.327 timesteps/s, 16.428 Matom-step/s
89.2% CPU use with 1 MPI tasks x no OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 0.022939 | 0.022939 | 0.022939 | 0.0 | 62.81
Bond | 0.00073851 | 0.00073851 | 0.00073851 | 0.0 | 2.02
Neigh | 0.0078339 | 0.0078339 | 0.0078339 | 0.0 | 21.45
Comm | 0.00072134 | 0.00072134 | 0.00072134 | 0.0 | 1.98
Output | 7.1419e-05 | 7.1419e-05 | 7.1419e-05 | 0.0 | 0.20
Modify | 0.0034868 | 0.0034868 | 0.0034868 | 0.0 | 9.55
Other | | 0.0007314 | | | 2.00
Nlocal: 1200 ave 1200 max 1200 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Nghost: 197 ave 197 max 197 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Neighs: 3094 ave 3094 max 3094 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Total # of neighbors = 3094
Ave neighs/atom = 2.5783333
Ave special neighs/atom = 0.5
Neighbor list builds = 52
Dangerous builds = 0
unfix 3
# Main run
pair_style lj/cut 2.5
# solvent/head - full-size and long-range
pair_coeff 1 1 1.0 1.0 2.5
pair_coeff 2 2 1.0 1.0 2.5
pair_coeff 1 2 1.0 1.0 2.5
# tail/tail - size-averaged and long-range
pair_coeff 3 3 1.0 0.75 2.5
pair_coeff 4 4 1.0 0.50 2.5
pair_coeff 3 4 1.0 0.67 2.5
# solvent/tail - full-size and repulsive
pair_coeff 1 3 1.0 1.0 1.12246
pair_coeff 1 4 1.0 1.0 1.12246
# head/tail - size-averaged and repulsive
pair_coeff 2 3 1.0 0.88 1.12246
pair_coeff 2 4 1.0 0.75 1.12246
thermo 50
#dump 1 all atom 2000 dump.micelle
#dump 2 all image 2000 image.*.jpg type type zoom 1.6
#dump_modify 2 pad 5 adiam 1 0.5 adiam 2 1.5 adiam 3 1.0 adiam 4 0.75
#dump 3 all movie 2000 movie.mpg type type zoom 1.6
#dump_modify 3 pad 5 adiam 1 0.5 adiam 2 1.5 adiam 3 1.0 adiam 4 0.75
reset_timestep 0
group solvent molecule 0
750 atoms in group solvent
group solute subtract all solvent
450 atoms in group solute
unfix 1
unfix 2
unfix 4
fix 1 solvent nve
fix 2 solvent temp/rescale 100 0.45 0.45 0.02 1.0
fix 5 solute rigid molecule langevin 0.45 0.45 0.5 112211
150 rigid bodies with 450 atoms
fix 4 all enforce2d
run 500
Generated 0 of 6 mixed pair_coeff terms from geometric mixing rule
Neighbor list info ...
update: every = 1 steps, delay = 5 steps, check = yes
max neighbors/atom: 2000, page size: 100000
master list distance cutoff = 2.8
ghost atom cutoff = 2.8
binsize = 1.4, bins = 26 26 1
1 neighbor lists, perpetual/occasional/extra = 1 0 0
(1) pair lj/cut, perpetual
attributes: half, newton on
pair build: half/bin/newton
stencil: half/bin/2d
bin: standard
Per MPI rank memory allocation (min/avg/max) = 5.391 | 5.391 | 5.391 Mbytes
Step Temp E_pair E_mol TotEng Press
0 0.45318168 -1.3753652 0.13695201 -0.8705807 1.975423
50 0.77344732 -1.6944083 0.13695201 -0.92967487 0.58657109
100 0.53530681 -1.7006195 0.13695201 -1.1291768 0.11219772
150 0.60820175 -1.8071581 0.13695201 -1.176549 1.5161796
200 0.49410558 -1.7945459 0.13695201 -1.2565449 4.0469262
250 0.52460847 -1.8528672 0.13695201 -1.290108 2.9929445
300 0.46596803 -1.8680499 0.13695201 -1.3528872 2.7958851
350 0.48831812 -1.8723486 0.13695201 -1.3390451 -4.5106818
400 0.46798432 -1.9008529 0.13695201 -1.3840536 -4.3096566
450 0.46000658 -1.9081144 0.13695201 -1.3977904 3.3360611
500 0.45822409 -1.9077531 0.13695201 -1.3988759 0.45428738
Loop time of 0.0650638 on 1 procs for 500 steps with 1200 atoms
Performance: 3319817.322 tau/day, 7684.762 timesteps/s, 9.222 Matom-step/s
100.0% CPU use with 1 MPI tasks x no OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 0.027565 | 0.027565 | 0.027565 | 0.0 | 42.37
Bond | 0.0007043 | 0.0007043 | 0.0007043 | 0.0 | 1.08
Neigh | 0.012724 | 0.012724 | 0.012724 | 0.0 | 19.56
Comm | 0.00091442 | 0.00091442 | 0.00091442 | 0.0 | 1.41
Output | 6.004e-05 | 6.004e-05 | 6.004e-05 | 0.0 | 0.09
Modify | 0.022329 | 0.022329 | 0.022329 | 0.0 | 34.32
Other | | 0.0007666 | | | 1.18
Nlocal: 1200 ave 1200 max 1200 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Nghost: 411 ave 411 max 411 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Neighs: 8759 ave 8759 max 8759 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Total # of neighbors = 8759
Ave neighs/atom = 7.2991667
Ave special neighs/atom = 0.5
Neighbor list builds = 46
Dangerous builds = 2
unfix 2
unfix 4
unfix 5
fix 5 solute rigid/small molecule
create bodies CPU = 0.000 seconds
150 rigid bodies with 450 atoms
1.3043524 = max distance from body owner to body atom
fix 4 all enforce2d
run 500
Generated 0 of 6 mixed pair_coeff terms from geometric mixing rule
Per MPI rank memory allocation (min/avg/max) = 9.306 | 9.306 | 9.306 Mbytes
Step Temp E_pair E_mol TotEng Press
500 0.45822409 -1.9077531 0.13695201 -1.3988759 2.4509752
550 0.46736204 -1.9141964 0.13695201 -1.3979022 2.1695662
600 0.47872194 -1.9232781 0.13695201 -1.3977635 2.0058379
650 0.47491575 -1.9224109 0.13695201 -1.3999857 2.0637789
700 0.44714331 -1.8990682 0.13695201 -1.3991848 2.4863082
750 0.49089274 -1.9231004 0.13695201 -1.3877071 2.123147
800 0.4753839 -1.8959698 0.13695201 -1.3731645 2.3030481
850 0.46870816 -1.8972225 0.13695201 -1.3798357 2.2464703
900 0.49610454 -1.9070748 0.13695201 -1.3674513 2.2196388
950 0.4773035 -1.8925765 0.13695201 -1.3682132 2.3534786
1000 0.50413702 -1.9292393 0.13695201 -1.383096 2.1630988
Loop time of 0.0592806 on 1 procs for 500 steps with 1200 atoms
Performance: 3643690.276 tau/day, 8434.468 timesteps/s, 10.121 Matom-step/s
100.0% CPU use with 1 MPI tasks x no OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 0.026866 | 0.026866 | 0.026866 | 0.0 | 45.32
Bond | 0.00071863 | 0.00071863 | 0.00071863 | 0.0 | 1.21
Neigh | 0.010927 | 0.010927 | 0.010927 | 0.0 | 18.43
Comm | 0.00084187 | 0.00084187 | 0.00084187 | 0.0 | 1.42
Output | 6.8106e-05 | 6.8106e-05 | 6.8106e-05 | 0.0 | 0.11
Modify | 0.019075 | 0.019075 | 0.019075 | 0.0 | 32.18
Other | | 0.000783 | | | 1.32
Nlocal: 1200 ave 1200 max 1200 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Nghost: 417 ave 417 max 417 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Neighs: 8654 ave 8654 max 8654 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Total # of neighbors = 8654
Ave neighs/atom = 7.2116667
Ave special neighs/atom = 0.5
Neighbor list builds = 39
Dangerous builds = 0
Total wall time: 0:00:00

View File

@ -0,0 +1,272 @@
LAMMPS (7 Feb 2024 - Development - patch_7Feb2024_update1-345-g506bf886ee-modified)
WARNING: Using I/O redirection is unreliable with parallel runs. Better to use the -in switch to read input files. (../lammps.cpp:551)
# 2d micelle simulation
dimension 2
neighbor 0.3 bin
neigh_modify delay 5
atom_style bond
# Soft potential push-off
read_data data.micelle
Reading data file ...
orthogonal box = (0 0 -0.1) to (35.85686 35.85686 0.1)
2 by 2 by 1 MPI processor grid
reading atoms ...
1200 atoms
scanning bonds ...
1 = max bonds/atom
reading bonds ...
300 bonds
Finding 1-2 1-3 1-4 neighbors ...
special bond factors lj: 0 0 0
special bond factors coul: 0 0 0
2 = max # of 1-2 neighbors
1 = max # of 1-3 neighbors
1 = max # of 1-4 neighbors
2 = max # of special neighbors
special bonds CPU = 0.000 seconds
read_data CPU = 0.004 seconds
special_bonds fene
Finding 1-2 1-3 1-4 neighbors ...
special bond factors lj: 0 1 1
special bond factors coul: 0 1 1
2 = max # of 1-2 neighbors
2 = max # of special neighbors
special bonds CPU = 0.000 seconds
pair_style soft 1.12246
pair_coeff * * 0.0 1.12246
bond_style harmonic
bond_coeff 1 50.0 0.75
velocity all create 0.45 2349852
variable prefactor equal ramp(1.0,20.0)
fix 1 all nve
fix 2 all temp/rescale 100 0.45 0.45 0.02 1.0
fix 3 all adapt 1 pair soft a * * v_prefactor
fix 4 all enforce2d
thermo 50
run 500
Generated 0 of 6 mixed pair_coeff terms from geometric mixing rule
WARNING: Communication cutoff 1.42246 is shorter than a bond length based estimate of 1.425. This may lead to errors. (../comm.cpp:730)
Neighbor list info ...
update: every = 1 steps, delay = 5 steps, check = yes
max neighbors/atom: 2000, page size: 100000
master list distance cutoff = 1.42246
ghost atom cutoff = 1.42246
binsize = 0.71123, bins = 51 51 1
1 neighbor lists, perpetual/occasional/extra = 1 0 0
(1) pair soft, perpetual
attributes: half, newton on
pair build: half/bin/newton
stencil: half/bin/2d
bin: standard
WARNING: Communication cutoff 1.42246 is shorter than a bond length based estimate of 1.425. This may lead to errors. (../comm.cpp:730)
Per MPI rank memory allocation (min/avg/max) = 4.126 | 4.126 | 4.127 Mbytes
Step Temp E_pair E_mol TotEng Press
0 0.45 0.40003481 2.2200223e-06 0.84966203 0.78952518
50 0.54981866 0.93548899 0.068440043 1.5532895 1.9232786
100 0.45 0.99659327 0.079228519 1.5254468 3.2135679
150 0.86965411 0.90456016 0.07493355 1.8484231 4.3821925
200 0.45 1.01454 0.10663502 1.5708 4.7598476
250 0.79636561 0.82567712 0.12105337 1.7424325 5.4983899
300 0.45 0.86475538 0.11819875 1.4325791 5.8554758
350 0.72135464 0.70693069 0.10912636 1.5368106 6.0388247
400 0.45 0.75067331 0.14165013 1.3419484 6.3840708
450 0.64839221 0.62402486 0.14173679 1.4136135 6.4791009
500 0.45 0.66669513 0.13695201 1.2532721 6.807146
Loop time of 0.0138659 on 4 procs for 500 steps with 1200 atoms
Performance: 15577811.312 tau/day, 36059.748 timesteps/s, 43.272 Matom-step/s
99.9% CPU use with 4 MPI tasks x no OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 0.0053896 | 0.0057144 | 0.0060899 | 0.4 | 41.21
Bond | 0.00020074 | 0.00021422 | 0.00022291 | 0.0 | 1.54
Neigh | 0.0025301 | 0.0025401 | 0.0025501 | 0.0 | 18.32
Comm | 0.0031194 | 0.0035074 | 0.0038196 | 0.4 | 25.30
Output | 6.4137e-05 | 6.7743e-05 | 7.7909e-05 | 0.0 | 0.49
Modify | 0.0013391 | 0.0013582 | 0.0013972 | 0.1 | 9.80
Other | | 0.0004638 | | | 3.34
Nlocal: 300 ave 304 max 292 min
Histogram: 1 0 0 0 0 0 0 0 2 1
Nghost: 103.5 ave 108 max 98 min
Histogram: 1 0 0 1 0 0 0 0 0 2
Neighs: 773.5 ave 792 max 735 min
Histogram: 1 0 0 0 0 0 0 0 2 1
Total # of neighbors = 3094
Ave neighs/atom = 2.5783333
Ave special neighs/atom = 0.5
Neighbor list builds = 52
Dangerous builds = 0
unfix 3
# Main run
pair_style lj/cut 2.5
# solvent/head - full-size and long-range
pair_coeff 1 1 1.0 1.0 2.5
pair_coeff 2 2 1.0 1.0 2.5
pair_coeff 1 2 1.0 1.0 2.5
# tail/tail - size-averaged and long-range
pair_coeff 3 3 1.0 0.75 2.5
pair_coeff 4 4 1.0 0.50 2.5
pair_coeff 3 4 1.0 0.67 2.5
# solvent/tail - full-size and repulsive
pair_coeff 1 3 1.0 1.0 1.12246
pair_coeff 1 4 1.0 1.0 1.12246
# head/tail - size-averaged and repulsive
pair_coeff 2 3 1.0 0.88 1.12246
pair_coeff 2 4 1.0 0.75 1.12246
thermo 50
#dump 1 all atom 2000 dump.micelle
#dump 2 all image 2000 image.*.jpg type type zoom 1.6
#dump_modify 2 pad 5 adiam 1 0.5 adiam 2 1.5 adiam 3 1.0 adiam 4 0.75
#dump 3 all movie 2000 movie.mpg type type zoom 1.6
#dump_modify 3 pad 5 adiam 1 0.5 adiam 2 1.5 adiam 3 1.0 adiam 4 0.75
reset_timestep 0
group solvent molecule 0
750 atoms in group solvent
group solute subtract all solvent
450 atoms in group solute
unfix 1
unfix 2
unfix 4
fix 1 solvent nve
fix 2 solvent temp/rescale 100 0.45 0.45 0.02 1.0
fix 5 solute rigid molecule langevin 0.45 0.45 0.5 112211
150 rigid bodies with 450 atoms
fix 4 all enforce2d
run 500
Generated 0 of 6 mixed pair_coeff terms from geometric mixing rule
Neighbor list info ...
update: every = 1 steps, delay = 5 steps, check = yes
max neighbors/atom: 2000, page size: 100000
master list distance cutoff = 2.8
ghost atom cutoff = 2.8
binsize = 1.4, bins = 26 26 1
1 neighbor lists, perpetual/occasional/extra = 1 0 0
(1) pair lj/cut, perpetual
attributes: half, newton on
pair build: half/bin/newton
stencil: half/bin/2d
bin: standard
Per MPI rank memory allocation (min/avg/max) = 5.375 | 5.375 | 5.375 Mbytes
Step Temp E_pair E_mol TotEng Press
0 0.45318168 -1.3753652 0.13695201 -0.8705807 1.975423
50 0.77344732 -1.6944083 0.13695201 -0.92967487 0.58657109
100 0.53530681 -1.7006195 0.13695201 -1.1291768 0.11219772
150 0.60820175 -1.8071581 0.13695201 -1.176549 1.5161796
200 0.49410558 -1.7945459 0.13695201 -1.2565449 4.0469262
250 0.52460847 -1.8528672 0.13695201 -1.290108 2.9929445
300 0.46596803 -1.8680499 0.13695201 -1.3528872 2.7958851
350 0.48831812 -1.8723486 0.13695201 -1.3390451 -4.5106818
400 0.46798432 -1.9008529 0.13695201 -1.3840536 -4.3096566
450 0.46000658 -1.9081144 0.13695201 -1.3977904 3.3360611
500 0.45822409 -1.9077531 0.13695201 -1.3988759 0.45428738
Loop time of 0.0381773 on 4 procs for 500 steps with 1200 atoms
Performance: 5657810.772 tau/day, 13096.784 timesteps/s, 15.716 Matom-step/s
99.6% CPU use with 4 MPI tasks x no OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 0.0059651 | 0.0062314 | 0.0066404 | 0.3 | 16.32
Bond | 0.00021057 | 0.00022477 | 0.0002333 | 0.0 | 0.59
Neigh | 0.0041424 | 0.0041487 | 0.0041512 | 0.0 | 10.87
Comm | 0.004264 | 0.0047244 | 0.0050297 | 0.4 | 12.37
Output | 8.2396e-05 | 8.6559e-05 | 9.6749e-05 | 0.0 | 0.23
Modify | 0.021833 | 0.021946 | 0.022094 | 0.1 | 57.48
Other | | 0.0008157 | | | 2.14
Nlocal: 300 ave 303 max 296 min
Histogram: 1 0 0 0 1 0 0 0 1 1
Nghost: 216.25 ave 219 max 214 min
Histogram: 1 0 1 0 0 0 1 0 0 1
Neighs: 2189.75 ave 2205 max 2173 min
Histogram: 1 0 0 0 1 0 1 0 0 1
Total # of neighbors = 8759
Ave neighs/atom = 7.2991667
Ave special neighs/atom = 0.5
Neighbor list builds = 46
Dangerous builds = 2
unfix 2
unfix 4
unfix 5
fix 5 solute rigid/small molecule
create bodies CPU = 0.000 seconds
150 rigid bodies with 450 atoms
1.3043524 = max distance from body owner to body atom
fix 4 all enforce2d
run 500
Generated 0 of 6 mixed pair_coeff terms from geometric mixing rule
Per MPI rank memory allocation (min/avg/max) = 9.233 | 9.233 | 9.234 Mbytes
Step Temp E_pair E_mol TotEng Press
500 0.45822409 -1.9077531 0.13695201 -1.3988759 2.4509752
550 0.46736204 -1.9141964 0.13695201 -1.3979022 2.1695662
600 0.47872194 -1.9232781 0.13695201 -1.3977635 2.0058379
650 0.47491575 -1.9224109 0.13695201 -1.3999857 2.0637789
700 0.44714331 -1.8990682 0.13695201 -1.3991848 2.4863082
750 0.49089274 -1.9231004 0.13695201 -1.3877071 2.123147
800 0.4753839 -1.8959698 0.13695201 -1.3731645 2.3030481
850 0.46870816 -1.8972225 0.13695201 -1.3798357 2.2464703
900 0.49610454 -1.9070748 0.13695201 -1.3674513 2.2196388
950 0.4773035 -1.8925765 0.13695201 -1.3682132 2.3534786
1000 0.50413702 -1.9292393 0.13695201 -1.383096 2.1630987
Loop time of 0.0236819 on 4 procs for 500 steps with 1200 atoms
Performance: 9120883.727 tau/day, 21113.157 timesteps/s, 25.336 Matom-step/s
99.9% CPU use with 4 MPI tasks x no OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 0.0058656 | 0.0059718 | 0.0061355 | 0.1 | 25.22
Bond | 0.0002083 | 0.00022447 | 0.00023485 | 0.0 | 0.95
Neigh | 0.0035477 | 0.0035644 | 0.0035824 | 0.0 | 15.05
Comm | 0.0041037 | 0.0042227 | 0.0043024 | 0.1 | 17.83
Output | 7.4355e-05 | 7.8273e-05 | 8.7777e-05 | 0.0 | 0.33
Modify | 0.008976 | 0.0090549 | 0.0091663 | 0.1 | 38.24
Other | | 0.0005654 | | | 2.39
Nlocal: 300 ave 306 max 295 min
Histogram: 1 0 1 0 0 1 0 0 0 1
Nghost: 221 ave 226 max 217 min
Histogram: 1 0 0 1 1 0 0 0 0 1
Neighs: 2163.5 ave 2271 max 2100 min
Histogram: 1 1 0 1 0 0 0 0 0 1
Total # of neighbors = 8654
Ave neighs/atom = 7.2116667
Ave special neighs/atom = 0.5
Neighbor list builds = 39
Dangerous builds = 0
Total wall time: 0:00:00

View File

@ -33,6 +33,7 @@
//#define ASYNC_DEVICE_COPY
#if 0
#if !defined(USE_OPENCL) && !defined(USE_HIP)
// temporary workaround for int2 also defined in cufft
#ifdef int2
@ -40,6 +41,7 @@
#endif
#include "cufft.h"
#endif
#endif
namespace LAMMPS_AL {
@ -313,10 +315,11 @@ class BaseAmoeba {
virtual int fphi_mpole();
virtual int polar_real(const int eflag, const int vflag) = 0;
#if 0
#if !defined(USE_OPENCL) && !defined(USE_HIP)
cufftHandle plan;
#endif
#endif
bool fft_plan_created;
};

View File

@ -1,5 +1,105 @@
# CHANGELOG
## [4.3.00](https://github.com/kokkos/kokkos/tree/4.3.00) (2024-03-19)
[Full Changelog](https://github.com/kokkos/kokkos/compare/4.2.01...4.3.00)
### Features:
* Add `Experimental::sort_by_key(exec, keys, values)` algorithm [\#6801](https://github.com/kokkos/kokkos/pull/6801)
### Backend and Architecture Enhancements:
#### CUDA:
* Experimental multi-GPU support (from the same process) [\#6782](https://github.com/kokkos/kokkos/pull/6782)
* Link against CUDA libraries even with KOKKOS_ENABLE_COMPILE_AS_CMAKE_LANGUAGE [\#6701](https://github.com/kokkos/kokkos/pull/6701)
* Don't use the compiler launcher script if the CMake compile language is CUDA. [\#6704](https://github.com/kokkos/kokkos/pull/6704)
* nvcc(wrapper): adding "long" and "short" versions for all flags [\#6615](https://github.com/kokkos/kokkos/pull/6615)
#### HIP:
* Fix compilation when using amdclang (with ROCm >= 5.7) and RDC [\#6857](https://github.com/kokkos/kokkos/pull/6857)
* Use rocthrust for sorting, when available [\#6793](https://github.com/kokkos/kokkos/pull/6793)
#### SYCL:
* We only support OneAPI SYCL implementation: add check during initialization
* Error out on initialization if the backend is different from `ext_oneapi_*` [\#6784](https://github.com/kokkos/kokkos/pull/6784)
* Filter GPU devices for `ext_onapi_*` GPU devices [\#6758](https://github.com/kokkos/kokkos/pull/6784)
* Performance Improvements
* Avoid unnecessary zero-memset of the scratch flags in SYCL [\#6739](https://github.com/kokkos/kokkos/pull/6739)
* Use host-pinned memory to copy reduction/scan result [\#6500](https://github.com/kokkos/kokkos/pull/6500)
* Address deprecations after oneAPI 2023.2.0 [\#6577](https://github.com/kokkos/kokkos/pull/6739)
* Make sure to call find_dependency for oneDPL if necessary [\#6870](https://github.com/kokkos/kokkos/pull/6870)
#### OpenMPTarget:
* Use LLVM extensions for dynamic shared memory [\#6380](https://github.com/kokkos/kokkos/pull/6380)
* Guard scratch memory usage in ParallelReduce [\#6585 ](https://github.com/kokkos/kokkos/pull/6585)
* Update linker flags for Intel GPUs update [\#6735](https://github.com/kokkos/kokkos/pull/6735)
* Improve handling of printf on Intel GPUs [\#6652](https://github.com/kokkos/kokkos/pull/6652)
#### OpenACC:
* Add atomics support [\#6446](https://github.com/kokkos/kokkos/pull/6446)
* Make the OpenACC backend asynchronous [\#6772](https://github.com/kokkos/kokkos/pull/6772)
#### Threads:
* Add missing broadcast to TeamThreadRange parallel_scan [\#6601](https://github.com/kokkos/kokkos/pull/6446)
#### OpenMP:
* Improve performance of view initializations and filling with zeros [\#6573](https://github.com/kokkos/kokkos/pull/6573)
### General Enhancements
* Improve performance of random number generation when using a normal distribution on GPUs [\#6556](https://github.com/kokkos/kokkos/pull/6556)
* Allocate temporary view with the user-provided execution space instance and do not initialize in `unique` algorithm [\#6598](https://github.com/kokkos/kokkos/pull/6598)
* Add deduction guide for `Kokkos::Array` [\#6373](https://github.com/kokkos/kokkos/pull/6373)
* Provide new public headers `<Kokkos_Clamp.hpp>` and `<Kokkos_MinMax.hpp>` [\#6687](https://github.com/kokkos/kokkos/pull/6687)
* Fix/improvement to `remove_if` parallel algorithm: use the provided execution space instance for temporary allocations and drop unnecessaryinitialization + avoid evaluating twice the predicate during final pass [\#6747](https://github.com/kokkos/kokkos/pull/6747)
* Add runtime function to query the number of devices and make device ID consistent with `KOKKOS_VISIBLE_DEVICES` [\#6713](https://github.com/kokkos/kokkos/pull/6713)
* simd: support `vector_aligned_tag` [\#6243](https://github.com/kokkos/kokkos/pull/6243)
* Avoid unnecessary allocation when default constructing Bitset [\#6524](https://github.com/kokkos/kokkos/pull/6524)
* Fix constness for views in std algorithms [\#6813](https://github.com/kokkos/kokkos/pull/6813)
* Improve error message on unsafe implicit conversion in MDRangePolicy [\#6855](https://github.com/kokkos/kokkos/pull/6855)
* CTAD (deduction guides) for RangePolicy [\#6850](https://github.com/kokkos/kokkos/pull/6850)
* CTAD (deduction guides) for MDRangePolicy [\#5516](https://github.com/kokkos/kokkos/pull/5516)
### Build System Changes
* Require `Kokkos_ENABLE_ATOMICS_BYPASS` option to bypass atomic operation for Serial backend only builds [\#6692](https://github.com/kokkos/kokkos/pull/6692)
* Add support for RISCV and the Milk-V's Pioneer [\#6773](https://github.com/kokkos/kokkos/pull/6773)
* Add C++26 standard to CMake setup [\#6733](https://github.com/kokkos/kokkos/pull/6733)
* Fix Makefile when using gnu_generate_makefile.sh and make >= 4.3 [\#6606](https://github.com/kokkos/kokkos/pull/6606)
* Cuda: Fix configuring with CMake >= 3.28.4 - temporary fallback to internal CudaToolkit.cmake [\#6898](https://github.com/kokkos/kokkos/pull/6898)
### Incompatibilities (i.e. breaking changes)
* Remove all `DEPRECATED_CODE_3` option and all code that was guarded by it [\#6523](https://github.com/kokkos/kokkos/pull/6523)
* Drop guards to accommodate external code defining `KOKKOS_ASSERT` [\#6665](https://github.com/kokkos/kokkos/pull/6665)
* `Profiling::ProfilingSection(std::string)` constructor marked explicit and nodiscard [\#6690](https://github.com/kokkos/kokkos/pull/6690)
* Add bound check preconditions for `RangePolicy` and `MDRangePolicy` [\#6617](https://github.com/kokkos/kokkos/pull/6617) [\#6726](https://github.com/kokkos/kokkos/pull/6726)
* Add checks for unsafe implicit conversions in RangePolicy [\#6754](https://github.com/kokkos/kokkos/pull/6754)
* Remove Kokkos::[b]half_t volatile overloads [\#6579](https://github.com/kokkos/kokkos/pull/6579)
* Remove KOKKOS_IMPL_DO_NOT_USE_PRINTF [\#6593](https://github.com/kokkos/kokkos/pull/6593)
* Check matching static extents in View constructor [\#5190 ](https://github.com/kokkos/kokkos/pull/5190)
* Tools(profiling): fix typo Kokkos_Tools_Optim[i]zationGoal [\#6642](https://github.com/kokkos/kokkos/pull/6642)
* Remove variadic range policy constructor (disallow passing multiple trailing chunk size arguments) [\#6845](https://github.com/kokkos/kokkos/pull/6845)
* Improve message on view out of bounds access and always abort [\#6861](https://github.com/kokkos/kokkos/pull/6861)
* Drop `KOKKOS_ENABLE_INTEL_MM_ALLOC` macro [\#6797](https://github.com/kokkos/kokkos/pull/6797)
* Remove `Kokkos::Experimental::LogicalMemorySpace` (without going through deprecation) [\#6557](https://github.com/kokkos/kokkos/pull/6557)
* Remove `Experimental::HBWSpace` and support for linking against memkind [\#6791](https://github.com/kokkos/kokkos/pull/6791)
* Drop librt TPL and associated `KOKKOS_ENABLE_LIBRT` macro [\#6798](https://github.com/kokkos/kokkos/pull/6798)
* Drop support for old CPU architectures (`ARCH_BGQ`, `ARCH_POWER7`, `ARCH_WSM` and associated `ARCH_SSE4` macro) [\#6806](https://github.com/kokkos/kokkos/pull/6806)
* Drop support for deprecated command-line arguments and environment variables [\#6744](https://github.com/kokkos/kokkos/pull/6744)
### Deprecations
* Provide kokkos_swap as part of Core and deprecate Experimental::swap in Algorithms [\#6697](https://github.com/kokkos/kokkos/pull/6697)
* Deprecate {Cuda,HIP}::detect_device_count() and Cuda::[detect_]device_arch() [\#6710](https://github.com/kokkos/kokkos/pull/6710)
* Deprecate `ExecutionSpace::in_parallel()` [\#6582](https://github.com/kokkos/kokkos/pull/6582)
### Bug Fixes
* Fix team-level MDRange reductions: [\#6511](https://github.com/kokkos/kokkos/pull/6511)
* Fix CUDA and SYCL small value type (16-bit) team reductions [\#5334](https://github.com/kokkos/kokkos/pull/5334)
* Enable `{transform_}exclusive_scan` in place [\#6667](https://github.com/kokkos/kokkos/pull/6667)
* `fill_random` overload that do not take an execution space instance argument should fence [\#6658](https://github.com/kokkos/kokkos/pull/6658)
* HIP,Cuda,OpenMPTarget: Fixup use provided execution space when copying host inaccessible reduction result [\#6777](https://github.com/kokkos/kokkos/pull/6777)
* Fix typo in `cuda_func_set_attribute[s]_wrapper` preventing proper setting of desired occupancy [\#6786](https://github.com/kokkos/kokkos/pull/6786)
* Avoid undefined behavior due to conversion between signed and unsigned integers in shift_{right, left}_team_impl [\#6821](https://github.com/kokkos/kokkos/pull/6821)
* Fix a bug in Makefile.kokkos when using AMD GPU architectures as `AMD_GFXYYY` [\#6892](https://github.com/kokkos/kokkos/pull/6892)
## [4.2.01](https://github.com/kokkos/kokkos/tree/4.2.01) (2023-12-07)
[Full Changelog](https://github.com/kokkos/kokkos/compare/4.2.00...4.2.01)
@ -999,95 +1099,95 @@
- Major update for OpenMPTarget: many capabilities now work. For details contact us.
- Added DPC++/SYCL backend: primary capabilites are working.
- Added Kokkos Graph API analogous to CUDA Graphs.
- Added parallel_scan support with TeamThreadRange [\#3536](https://github.com/kokkos/kokkos/pull/#3536)
- Added Logical Memory Spaces [\#3546](https://github.com/kokkos/kokkos/pull/#3546)
- Added initial half precision support [\#3439](https://github.com/kokkos/kokkos/pull/#3439)
- Experimental feature: control cuda occupancy [\#3379](https://github.com/kokkos/kokkos/pull/#3379)
- Added parallel_scan support with TeamThreadRange [\#3536](https://github.com/kokkos/kokkos/pull/3536)
- Added Logical Memory Spaces [\#3546](https://github.com/kokkos/kokkos/pull/3546)
- Added initial half precision support [\#3439](https://github.com/kokkos/kokkos/pull/3439)
- Experimental feature: control cuda occupancy [\#3379](https://github.com/kokkos/kokkos/pull/3379)
**Implemented enhancements Backends and Archs:**
- Add a64fx and fujitsu Compiler support [\#3614](https://github.com/kokkos/kokkos/pull/#3614)
- Adding support for AMD gfx908 archictecture [\#3375](https://github.com/kokkos/kokkos/pull/#3375)
- SYCL parallel\_for MDRangePolicy [\#3583](https://github.com/kokkos/kokkos/pull/#3583)
- SYCL add parallel\_scan [\#3577](https://github.com/kokkos/kokkos/pull/#3577)
- SYCL custom reductions [\#3544](https://github.com/kokkos/kokkos/pull/#3544)
- SYCL Enable container unit tests [\#3550](https://github.com/kokkos/kokkos/pull/#3550)
- SYCL feature level 5 [\#3480](https://github.com/kokkos/kokkos/pull/#3480)
- SYCL Feature level 4 (parallel\_for) [\#3474](https://github.com/kokkos/kokkos/pull/#3474)
- SYCL feature level 3 [\#3451](https://github.com/kokkos/kokkos/pull/#3451)
- SYCL feature level 2 [\#3447](https://github.com/kokkos/kokkos/pull/#3447)
- OpenMPTarget: Hierarchial reduction for + operator on scalars [\#3504](https://github.com/kokkos/kokkos/pull/#3504)
- OpenMPTarget hierarchical [\#3411](https://github.com/kokkos/kokkos/pull/#3411)
- HIP Add Impl::atomic\_[store,load] [\#3440](https://github.com/kokkos/kokkos/pull/#3440)
- HIP enable global lock arrays [\#3418](https://github.com/kokkos/kokkos/pull/#3418)
- HIP Implement multiple occupancy paths for various HIP kernel launchers [\#3366](https://github.com/kokkos/kokkos/pull/#3366)
- Add a64fx and fujitsu Compiler support [\#3614](https://github.com/kokkos/kokkos/pull/3614)
- Adding support for AMD gfx908 archictecture [\#3375](https://github.com/kokkos/kokkos/pull/3375)
- SYCL parallel\_for MDRangePolicy [\#3583](https://github.com/kokkos/kokkos/pull/3583)
- SYCL add parallel\_scan [\#3577](https://github.com/kokkos/kokkos/pull/3577)
- SYCL custom reductions [\#3544](https://github.com/kokkos/kokkos/pull/3544)
- SYCL Enable container unit tests [\#3550](https://github.com/kokkos/kokkos/pull/3550)
- SYCL feature level 5 [\#3480](https://github.com/kokkos/kokkos/pull/3480)
- SYCL Feature level 4 (parallel\_for) [\#3474](https://github.com/kokkos/kokkos/pull/3474)
- SYCL feature level 3 [\#3451](https://github.com/kokkos/kokkos/pull/3451)
- SYCL feature level 2 [\#3447](https://github.com/kokkos/kokkos/pull/3447)
- OpenMPTarget: Hierarchial reduction for + operator on scalars [\#3504](https://github.com/kokkos/kokkos/pull/3504)
- OpenMPTarget hierarchical [\#3411](https://github.com/kokkos/kokkos/pull/3411)
- HIP Add Impl::atomic\_[store,load] [\#3440](https://github.com/kokkos/kokkos/pull/3440)
- HIP enable global lock arrays [\#3418](https://github.com/kokkos/kokkos/pull/3418)
- HIP Implement multiple occupancy paths for various HIP kernel launchers [\#3366](https://github.com/kokkos/kokkos/pull/3366)
**Implemented enhancements Policies:**
- MDRangePolicy: Let it be semiregular [\#3494](https://github.com/kokkos/kokkos/pull/#3494)
- MDRangePolicy: Check narrowing conversion in construction [\#3527](https://github.com/kokkos/kokkos/pull/#3527)
- MDRangePolicy: CombinedReducers support [\#3395](https://github.com/kokkos/kokkos/pull/#3395)
- Kokkos Graph: Interface and Default Implementation [\#3362](https://github.com/kokkos/kokkos/pull/#3362)
- Kokkos Graph: add Cuda Graph implementation [\#3369](https://github.com/kokkos/kokkos/pull/#3369)
- TeamPolicy: implemented autotuning of team sizes and vector lengths [\#3206](https://github.com/kokkos/kokkos/pull/#3206)
- RangePolicy: Initialize all data members in default constructor [\#3509](https://github.com/kokkos/kokkos/pull/#3509)
- MDRangePolicy: Let it be semiregular [\#3494](https://github.com/kokkos/kokkos/pull/3494)
- MDRangePolicy: Check narrowing conversion in construction [\#3527](https://github.com/kokkos/kokkos/pull/3527)
- MDRangePolicy: CombinedReducers support [\#3395](https://github.com/kokkos/kokkos/pull/3395)
- Kokkos Graph: Interface and Default Implementation [\#3362](https://github.com/kokkos/kokkos/pull/3362)
- Kokkos Graph: add Cuda Graph implementation [\#3369](https://github.com/kokkos/kokkos/pull/3369)
- TeamPolicy: implemented autotuning of team sizes and vector lengths [\#3206](https://github.com/kokkos/kokkos/pull/3206)
- RangePolicy: Initialize all data members in default constructor [\#3509](https://github.com/kokkos/kokkos/pull/3509)
**Implemented enhancements BuildSystem:**
- Auto-generate core test files for all backends [\#3488](https://github.com/kokkos/kokkos/pull/#3488)
- Avoid rewriting test files when calling cmake [\#3548](https://github.com/kokkos/kokkos/pull/#3548)
- RULE\_LAUNCH\_COMPILE and RULE\_LAUNCH\_LINK system for nvcc\_wrapper [\#3136](https://github.com/kokkos/kokkos/pull/#3136)
- Adding -include as a known argument to nvcc\_wrapper [\#3434](https://github.com/kokkos/kokkos/pull/#3434)
- Install hpcbind script [\#3402](https://github.com/kokkos/kokkos/pull/#3402)
- cmake/kokkos\_tribits.cmake: add parsing for args [\#3457](https://github.com/kokkos/kokkos/pull/#3457)
- Auto-generate core test files for all backends [\#3488](https://github.com/kokkos/kokkos/pull/3488)
- Avoid rewriting test files when calling cmake [\#3548](https://github.com/kokkos/kokkos/pull/3548)
- RULE\_LAUNCH\_COMPILE and RULE\_LAUNCH\_LINK system for nvcc\_wrapper [\#3136](https://github.com/kokkos/kokkos/pull/3136)
- Adding -include as a known argument to nvcc\_wrapper [\#3434](https://github.com/kokkos/kokkos/pull/3434)
- Install hpcbind script [\#3402](https://github.com/kokkos/kokkos/pull/3402)
- cmake/kokkos\_tribits.cmake: add parsing for args [\#3457](https://github.com/kokkos/kokkos/pull/3457)
**Implemented enhancements Tools:**
- Changed namespacing of Kokkos::Tools::Impl::Impl::tune\_policy [\#3455](https://github.com/kokkos/kokkos/pull/#3455)
- Delegate to an impl allocate/deallocate method to allow specifying a SpaceHandle for MemorySpaces [\#3530](https://github.com/kokkos/kokkos/pull/#3530)
- Use the Kokkos Profiling interface rather than the Impl interface [\#3518](https://github.com/kokkos/kokkos/pull/#3518)
- Runtime option for tuning [\#3459](https://github.com/kokkos/kokkos/pull/#3459)
- Dual View Tool Events [\#3326](https://github.com/kokkos/kokkos/pull/#3326)
- Changed namespacing of Kokkos::Tools::Impl::Impl::tune\_policy [\#3455](https://github.com/kokkos/kokkos/pull/3455)
- Delegate to an impl allocate/deallocate method to allow specifying a SpaceHandle for MemorySpaces [\#3530](https://github.com/kokkos/kokkos/pull/3530)
- Use the Kokkos Profiling interface rather than the Impl interface [\#3518](https://github.com/kokkos/kokkos/pull/3518)
- Runtime option for tuning [\#3459](https://github.com/kokkos/kokkos/pull/3459)
- Dual View Tool Events [\#3326](https://github.com/kokkos/kokkos/pull/3326)
**Implemented enhancements Other:**
- Abort on errors instead of just printing [\#3528](https://github.com/kokkos/kokkos/pull/#3528)
- Enable C++14 macros unconditionally [\#3449](https://github.com/kokkos/kokkos/pull/#3449)
- Make ViewMapping trivially copyable [\#3436](https://github.com/kokkos/kokkos/pull/#3436)
- Rename struct ViewMapping to class [\#3435](https://github.com/kokkos/kokkos/pull/#3435)
- Replace enums in Kokkos\_ViewMapping.hpp (removes -Wextra) [\#3422](https://github.com/kokkos/kokkos/pull/#3422)
- Use bool for enums representing bools [\#3416](https://github.com/kokkos/kokkos/pull/#3416)
- Fence active instead of default execution space instances [\#3388](https://github.com/kokkos/kokkos/pull/#3388)
- Refactor parallel\_reduce fence usage [\#3359](https://github.com/kokkos/kokkos/pull/#3359)
- Moved Space EBO helpers to Kokkos\_EBO [\#3357](https://github.com/kokkos/kokkos/pull/#3357)
- Add remove\_cvref type trait [\#3340](https://github.com/kokkos/kokkos/pull/#3340)
- Adding identity type traits and update definition of identity\_t alias [\#3339](https://github.com/kokkos/kokkos/pull/#3339)
- Add is\_specialization\_of type trait [\#3338](https://github.com/kokkos/kokkos/pull/#3338)
- Make ScratchMemorySpace semi-regular [\#3309](https://github.com/kokkos/kokkos/pull/#3309)
- Optimize min/max atomics with early exit on no-op case [\#3265](https://github.com/kokkos/kokkos/pull/#3265)
- Refactor Backend Development [\#2941](https://github.com/kokkos/kokkos/pull/#2941)
- Abort on errors instead of just printing [\#3528](https://github.com/kokkos/kokkos/pull/3528)
- Enable C++14 macros unconditionally [\#3449](https://github.com/kokkos/kokkos/pull/3449)
- Make ViewMapping trivially copyable [\#3436](https://github.com/kokkos/kokkos/pull/3436)
- Rename struct ViewMapping to class [\#3435](https://github.com/kokkos/kokkos/pull/3435)
- Replace enums in Kokkos\_ViewMapping.hpp (removes -Wextra) [\#3422](https://github.com/kokkos/kokkos/pull/3422)
- Use bool for enums representing bools [\#3416](https://github.com/kokkos/kokkos/pull/3416)
- Fence active instead of default execution space instances [\#3388](https://github.com/kokkos/kokkos/pull/3388)
- Refactor parallel\_reduce fence usage [\#3359](https://github.com/kokkos/kokkos/pull/3359)
- Moved Space EBO helpers to Kokkos\_EBO [\#3357](https://github.com/kokkos/kokkos/pull/3357)
- Add remove\_cvref type trait [\#3340](https://github.com/kokkos/kokkos/pull/3340)
- Adding identity type traits and update definition of identity\_t alias [\#3339](https://github.com/kokkos/kokkos/pull/3339)
- Add is\_specialization\_of type trait [\#3338](https://github.com/kokkos/kokkos/pull/3338)
- Make ScratchMemorySpace semi-regular [\#3309](https://github.com/kokkos/kokkos/pull/3309)
- Optimize min/max atomics with early exit on no-op case [\#3265](https://github.com/kokkos/kokkos/pull/3265)
- Refactor Backend Development [\#2941](https://github.com/kokkos/kokkos/pull/2941)
**Fixed bugs:**
- Fixup MDRangePolicy construction from Kokkos arrays [\#3591](https://github.com/kokkos/kokkos/pull/#3591)
- Add atomic functions for unsigned long long using gcc built-in [\#3588](https://github.com/kokkos/kokkos/pull/#3588)
- Fixup silent pointless comparison with zero in checked\_narrow\_cast (compiler workaround) [\#3566](https://github.com/kokkos/kokkos/pull/#3566)
- Fixes for ROCm 3.9 [\#3565](https://github.com/kokkos/kokkos/pull/#3565)
- Fix windows build issues which crept in for the CUDA build [\#3532](https://github.com/kokkos/kokkos/pull/#3532)
- HIP Fix atomics of large data types and clean up lock arrays [\#3529](https://github.com/kokkos/kokkos/pull/#3529)
- Pthreads fix exception resulting from 0 grain size [\#3510](https://github.com/kokkos/kokkos/pull/#3510)
- Fixup do not require atomic operation to be default constructible [\#3503](https://github.com/kokkos/kokkos/pull/#3503)
- Fix race condition in HIP backend [\#3467](https://github.com/kokkos/kokkos/pull/#3467)
- Replace KOKKOS\_DEBUG with KOKKOS\_ENABLE\_DEBUG [\#3458](https://github.com/kokkos/kokkos/pull/#3458)
- Fix multi-stream team scratch space definition for HIP [\#3398](https://github.com/kokkos/kokkos/pull/#3398)
- HIP fix template deduction [\#3393](https://github.com/kokkos/kokkos/pull/#3393)
- Fix compiling with HIP and C++17 [\#3390](https://github.com/kokkos/kokkos/pull/#3390)
- Fix sigFPE in HIP blocksize deduction [\#3378](https://github.com/kokkos/kokkos/pull/#3378)
- Type alias change: replace CS with CTS to avoid conflicts with NVSHMEM [\#3348](https://github.com/kokkos/kokkos/pull/#3348)
- Clang compilation of CUDA backend on Windows [\#3345](https://github.com/kokkos/kokkos/pull/#3345)
- Fix HBW support [\#3343](https://github.com/kokkos/kokkos/pull/#3343)
- Added missing fences to unique token [\#3260](https://github.com/kokkos/kokkos/pull/#3260)
- Fixup MDRangePolicy construction from Kokkos arrays [\#3591](https://github.com/kokkos/kokkos/pull/3591)
- Add atomic functions for unsigned long long using gcc built-in [\#3588](https://github.com/kokkos/kokkos/pull/3588)
- Fixup silent pointless comparison with zero in checked\_narrow\_cast (compiler workaround) [\#3566](https://github.com/kokkos/kokkos/pull/3566)
- Fixes for ROCm 3.9 [\#3565](https://github.com/kokkos/kokkos/pull/3565)
- Fix windows build issues which crept in for the CUDA build [\#3532](https://github.com/kokkos/kokkos/pull/3532)
- HIP Fix atomics of large data types and clean up lock arrays [\#3529](https://github.com/kokkos/kokkos/pull/3529)
- Pthreads fix exception resulting from 0 grain size [\#3510](https://github.com/kokkos/kokkos/pull/3510)
- Fixup do not require atomic operation to be default constructible [\#3503](https://github.com/kokkos/kokkos/pull/3503)
- Fix race condition in HIP backend [\#3467](https://github.com/kokkos/kokkos/pull/3467)
- Replace KOKKOS\_DEBUG with KOKKOS\_ENABLE\_DEBUG [\#3458](https://github.com/kokkos/kokkos/pull/3458)
- Fix multi-stream team scratch space definition for HIP [\#3398](https://github.com/kokkos/kokkos/pull/3398)
- HIP fix template deduction [\#3393](https://github.com/kokkos/kokkos/pull/3393)
- Fix compiling with HIP and C++17 [\#3390](https://github.com/kokkos/kokkos/pull/3390)
- Fix sigFPE in HIP blocksize deduction [\#3378](https://github.com/kokkos/kokkos/pull/3378)
- Type alias change: replace CS with CTS to avoid conflicts with NVSHMEM [\#3348](https://github.com/kokkos/kokkos/pull/3348)
- Clang compilation of CUDA backend on Windows [\#3345](https://github.com/kokkos/kokkos/pull/3345)
- Fix HBW support [\#3343](https://github.com/kokkos/kokkos/pull/3343)
- Added missing fences to unique token [\#3260](https://github.com/kokkos/kokkos/pull/3260)
**Incompatibilities:**
- Remove unused utilities (forward, move, and expand\_variadic) from Kokkos::Impl [\#3535](https://github.com/kokkos/kokkos/pull/#3535)
- Remove unused traits [\#3534](https://github.com/kokkos/kokkos/pull/#3534)
- HIP: Remove old HCC code [\#3301](https://github.com/kokkos/kokkos/pull/#3301)
- Prepare for deprecation of ViewAllocateWithoutInitializing [\#3264](https://github.com/kokkos/kokkos/pull/#3264)
- Remove ROCm backend [\#3148](https://github.com/kokkos/kokkos/pull/#3148)
- Remove unused utilities (forward, move, and expand\_variadic) from Kokkos::Impl [\#3535](https://github.com/kokkos/kokkos/pull/3535)
- Remove unused traits [\#3534](https://github.com/kokkos/kokkos/pull/3534)
- HIP: Remove old HCC code [\#3301](https://github.com/kokkos/kokkos/pull/3301)
- Prepare for deprecation of ViewAllocateWithoutInitializing [\#3264](https://github.com/kokkos/kokkos/pull/3264)
- Remove ROCm backend [\#3148](https://github.com/kokkos/kokkos/pull/3148)
## [3.2.01](https://github.com/kokkos/kokkos/tree/3.2.01) (2020-11-17)
[Full Changelog](https://github.com/kokkos/kokkos/compare/3.2.00...3.2.01)

View File

@ -150,8 +150,8 @@ ENDIF()
set(Kokkos_VERSION_MAJOR 4)
set(Kokkos_VERSION_MINOR 2)
set(Kokkos_VERSION_PATCH 1)
set(Kokkos_VERSION_MINOR 3)
set(Kokkos_VERSION_PATCH 0)
set(Kokkos_VERSION "${Kokkos_VERSION_MAJOR}.${Kokkos_VERSION_MINOR}.${Kokkos_VERSION_PATCH}")
message(STATUS "Kokkos version: ${Kokkos_VERSION}")
math(EXPR KOKKOS_VERSION "${Kokkos_VERSION_MAJOR} * 10000 + ${Kokkos_VERSION_MINOR} * 100 + ${Kokkos_VERSION_PATCH}")

View File

@ -11,8 +11,8 @@ CXXFLAGS += $(SHFLAGS)
endif
KOKKOS_VERSION_MAJOR = 4
KOKKOS_VERSION_MINOR = 2
KOKKOS_VERSION_PATCH = 1
KOKKOS_VERSION_MINOR = 3
KOKKOS_VERSION_PATCH = 0
KOKKOS_VERSION = $(shell echo $(KOKKOS_VERSION_MAJOR)*10000+$(KOKKOS_VERSION_MINOR)*100+$(KOKKOS_VERSION_PATCH) | bc)
# Options: Cuda,HIP,SYCL,OpenMPTarget,OpenMP,Threads,Serial
@ -22,14 +22,14 @@ KOKKOS_DEVICES ?= "OpenMP"
# Intel: KNC,KNL,SNB,HSW,BDW,SKL,SKX,ICL,ICX,SPR
# NVIDIA: Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal60,Pascal61,Volta70,Volta72,Turing75,Ampere80,Ampere86,Ada89,Hopper90
# ARM: ARMv80,ARMv81,ARMv8-ThunderX,ARMv8-TX2,A64FX
# IBM: BGQ,Power7,Power8,Power9
# AMD-GPUS: GFX906,GFX908,GFX90A,GFX940,GFX942,GFX1030,GFX1100
# IBM: Power8,Power9
# AMD-GPUS: AMD_GFX906,AMD_GFX908,AMD_GFX90A,AMD_GFX940,AMD_GFX942,AMD_GFX1030,AMD_GFX1100
# AMD-CPUS: AMDAVX,Zen,Zen2,Zen3
# Intel-GPUs: Gen9,Gen11,Gen12LP,DG1,XeHP,PVC
KOKKOS_ARCH ?= ""
# Options: yes,no
KOKKOS_DEBUG ?= "no"
# Options: hwloc,librt,experimental_memkind
# Options: hwloc
KOKKOS_USE_TPLS ?= ""
# Options: c++17,c++1z,c++20,c++2a,c++23,c++2b
KOKKOS_CXX_STANDARD ?= "c++17"
@ -56,7 +56,7 @@ uppercase_internal=$(if $1,$$(subst $(firstword $1),$(call uppercase_internal,$(
uppercase=$(eval uppercase_RESULT:=$(call uppercase_internal,$(uppercase_TABLE),$1))$(uppercase_RESULT)
# Return a 1 if a string contains a substring and 0 if not
# Note the search string should be without '"'
# Example: $(call kokkos_has_string,"hwloc,librt",hwloc)
# Example: $(call kokkos_has_string,"hwloc,libdl",hwloc)
# Will return a 1
kokkos_has_string=$(if $(findstring $(call uppercase,$2),$(call uppercase,$1)),1,0)
# Returns 1 if the path exists, 0 otherwise
@ -73,11 +73,11 @@ KOKKOS_INTERNAL_ENABLE_CXX20 := $(call kokkos_has_string,$(KOKKOS_CXX_STANDARD),
KOKKOS_INTERNAL_ENABLE_CXX2A := $(call kokkos_has_string,$(KOKKOS_CXX_STANDARD),c++2a)
KOKKOS_INTERNAL_ENABLE_CXX23 := $(call kokkos_has_string,$(KOKKOS_CXX_STANDARD),c++23)
KOKKOS_INTERNAL_ENABLE_CXX2B := $(call kokkos_has_string,$(KOKKOS_CXX_STANDARD),c++2b)
KOKKOS_INTERNAL_ENABLE_CXX26 := $(call kokkos_has_string,$(KOKKOS_CXX_STANDARD),c++26)
KOKKOS_INTERNAL_ENABLE_CXX2C := $(call kokkos_has_string,$(KOKKOS_CXX_STANDARD),c++2c)
# Check for external libraries.
KOKKOS_INTERNAL_USE_HWLOC := $(call kokkos_has_string,$(KOKKOS_USE_TPLS),hwloc)
KOKKOS_INTERNAL_USE_LIBRT := $(call kokkos_has_string,$(KOKKOS_USE_TPLS),librt)
KOKKOS_INTERNAL_USE_MEMKIND := $(call kokkos_has_string,$(KOKKOS_USE_TPLS),experimental_memkind)
# Check for advanced settings.
KOKKOS_INTERNAL_ENABLE_COMPILER_WARNINGS := $(call kokkos_has_string,$(KOKKOS_OPTIONS),compiler_warnings)
@ -318,7 +318,6 @@ endif
# Intel based.
KOKKOS_INTERNAL_USE_ARCH_KNC := $(call kokkos_has_string,$(KOKKOS_ARCH),KNC)
KOKKOS_INTERNAL_USE_ARCH_WSM := $(call kokkos_has_string,$(KOKKOS_ARCH),WSM)
KOKKOS_INTERNAL_USE_ARCH_SNB := $(call kokkos_has_string,$(KOKKOS_ARCH),SNB)
KOKKOS_INTERNAL_USE_ARCH_HSW := $(call kokkos_has_string,$(KOKKOS_ARCH),HSW)
KOKKOS_INTERNAL_USE_ARCH_BDW := $(call kokkos_has_string,$(KOKKOS_ARCH),BDW)
@ -398,11 +397,9 @@ KOKKOS_INTERNAL_USE_ARCH_A64FX := $(call kokkos_has_string,$(KOKKOS_ARCH),A64FX)
KOKKOS_INTERNAL_USE_ARCH_ARM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_ARMV80)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV81)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX2)+$(KOKKOS_INTERNAL_USE_ARCH_A64FX) | bc))
# IBM based.
KOKKOS_INTERNAL_USE_ARCH_BGQ := $(call kokkos_has_string,$(KOKKOS_ARCH),BGQ)
KOKKOS_INTERNAL_USE_ARCH_POWER7 := $(call kokkos_has_string,$(KOKKOS_ARCH),Power7)
KOKKOS_INTERNAL_USE_ARCH_POWER8 := $(call kokkos_has_string,$(KOKKOS_ARCH),Power8)
KOKKOS_INTERNAL_USE_ARCH_POWER9 := $(call kokkos_has_string,$(KOKKOS_ARCH),Power9)
KOKKOS_INTERNAL_USE_ARCH_IBM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_BGQ)+$(KOKKOS_INTERNAL_USE_ARCH_POWER7)+$(KOKKOS_INTERNAL_USE_ARCH_POWER8)+$(KOKKOS_INTERNAL_USE_ARCH_POWER9) | bc))
KOKKOS_INTERNAL_USE_ARCH_IBM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_POWER8)+$(KOKKOS_INTERNAL_USE_ARCH_POWER9) | bc))
# AMD based.
KOKKOS_INTERNAL_USE_ARCH_AMDAVX := $(call kokkos_has_string,$(KOKKOS_ARCH),AMDAVX)
@ -413,22 +410,37 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ZEN3), 0)
KOKKOS_INTERNAL_USE_ARCH_ZEN := $(call kokkos_has_string,$(KOKKOS_ARCH),Zen)
endif
endif
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX906 := $(or $(call kokkos_has_string,$(KOKKOS_ARCH),VEGA906),$(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX906))
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX908 := $(or $(call kokkos_has_string,$(KOKKOS_ARCH),VEGA908),$(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX908))
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX90A := $(or $(call kokkos_has_string,$(KOKKOS_ARCH),VEGA90A),$(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX90A))
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX906 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX906)
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX906), 0)
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX906 := $(call kokkos_has_string,$(KOKKOS_ARCH),VEGA906)
endif
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX908 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX908)
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX908), 0)
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX908 := $(call kokkos_has_string,$(KOKKOS_ARCH),VEGA908)
endif
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX90A := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX90A)
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX90A), 0)
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX90A := $(call kokkos_has_string,$(KOKKOS_ARCH),VEGA90A)
endif
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX940 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX940)
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX942 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX942)
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1030 := $(or $(call kokkos_has_string,$(KOKKOS_ARCH),NAVI1030),$(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX1030))
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1100 := $(or $(call kokkos_has_string,$(KOKKOS_ARCH),NAVI1100),$(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX1100))
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1030 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX1030)
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1030), 0)
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1030 := $(call kokkos_has_string,$(KOKKOS_ARCH),NAVI1030)
endif
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1100 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX1100)
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1100), 0)
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1100 := $(call kokkos_has_string,$(KOKKOS_ARCH),NAVI1100)
endif
# Any AVX?
KOKKOS_INTERNAL_USE_ARCH_SSE42 := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_WSM))
KOKKOS_INTERNAL_USE_ARCH_AVX := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_SNB) + $(KOKKOS_INTERNAL_USE_ARCH_AMDAVX))
KOKKOS_INTERNAL_USE_ARCH_AVX2 := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_HSW) + $(KOKKOS_INTERNAL_USE_ARCH_BDW) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN2) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN3))
KOKKOS_INTERNAL_USE_ARCH_AVX512MIC := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_KNL))
# Incompatible flags?
KOKKOS_INTERNAL_USE_ARCH_MULTIHOST := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_SSE42)+$(KOKKOS_INTERNAL_USE_ARCH_AVX)+$(KOKKOS_INTERNAL_USE_ARCH_AVX2)+$(KOKKOS_INTERNAL_USE_ARCH_AVX512MIC)+$(KOKKOS_INTERNAL_USE_ARCH_SKL)+$(KOKKOS_INTERNAL_USE_ARCH_SKX)+$(KOKKOS_INTERNAL_USE_ARCH_ICL)+$(KOKKOS_INTERNAL_USE_ARCH_ICX)+$(KOKKOS_INTERNAL_USE_ARCH_SPR)+$(KOKKOS_INTERNAL_USE_ARCH_KNC)+$(KOKKOS_INTERNAL_USE_ARCH_IBM)+$(KOKKOS_INTERNAL_USE_ARCH_ARM)>1") | bc)
KOKKOS_INTERNAL_USE_ARCH_MULTIHOST := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_AVX)+$(KOKKOS_INTERNAL_USE_ARCH_AVX2)+$(KOKKOS_INTERNAL_USE_ARCH_AVX512MIC)+$(KOKKOS_INTERNAL_USE_ARCH_SKL)+$(KOKKOS_INTERNAL_USE_ARCH_SKX)+$(KOKKOS_INTERNAL_USE_ARCH_ICL)+$(KOKKOS_INTERNAL_USE_ARCH_ICX)+$(KOKKOS_INTERNAL_USE_ARCH_SPR)+$(KOKKOS_INTERNAL_USE_ARCH_KNC)+$(KOKKOS_INTERNAL_USE_ARCH_IBM)+$(KOKKOS_INTERNAL_USE_ARCH_ARM)>1") | bc)
KOKKOS_INTERNAL_USE_ARCH_MULTIGPU := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_NVIDIA)>1") | bc)
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MULTIHOST), 1)
@ -573,6 +585,16 @@ ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX2B), 1)
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX2B_FLAG)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CXX23")
endif
ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX26), 1)
#I cannot make CMake add this in a good way - so add it here
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX26_FLAG)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CXX26")
endif
ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX2C), 1)
#I cannot make CMake add this in a good way - so add it here
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX2C_FLAG)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CXX26")
endif
ifeq ($(KOKKOS_INTERNAL_ENABLE_DEBUG), 1)
ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1)
@ -612,27 +634,6 @@ ifeq ($(KOKKOS_INTERNAL_USE_HWLOC), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_HWLOC")
endif
ifeq ($(KOKKOS_INTERNAL_USE_LIBRT), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_USE_LIBRT")
KOKKOS_LIBS += -lrt
KOKKOS_TPL_LIBRARY_NAMES += rt
endif
ifeq ($(KOKKOS_INTERNAL_USE_MEMKIND), 1)
ifneq ($(KOKKOS_CMAKE), yes)
ifneq ($(MEMKIND_PATH),)
KOKKOS_CPPFLAGS += -I$(MEMKIND_PATH)/include
KOKKOS_LIBDIRS += -L$(MEMKIND_PATH)/lib
KOKKOS_CXXLDFLAGS += -L$(MEMKIND_PATH)/lib
KOKKOS_TPL_INCLUDE_DIRS += $(MEMKIND_PATH)/include
KOKKOS_TPL_LIBRARY_DIRS += $(MEMKIND_PATH)/lib
endif
KOKKOS_LIBS += -lmemkind -lnuma
KOKKOS_TPL_LIBRARY_NAMES += memkind numa
endif
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_HBWSPACE")
endif
ifeq ($(KOKKOS_INTERNAL_ENABLE_LARGE_MEM_TESTS), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_LARGE_MEM_TESTS")
endif
@ -699,10 +700,6 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
endif
endif
ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_IMPL_CUDA_CLANG_WORKAROUND")
endif
ifeq ($(KOKKOS_INTERNAL_CUDA_DISABLE_MALLOC_ASYNC), 0)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_IMPL_CUDA_MALLOC_ASYNC")
else
@ -827,20 +824,6 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX2), 1)
endif
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_SSE42), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_SSE42")
ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
KOKKOS_CXXFLAGS += -xSSE4.2
KOKKOS_LDFLAGS += -xSSE4.2
else ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
else
# Assume that this is a really a GNU compiler.
KOKKOS_CXXFLAGS += -msse4.2
KOKKOS_LDFLAGS += -msse4.2
endif
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX")
@ -1249,7 +1232,6 @@ ifneq ($(KOKKOS_INTERNAL_NEW_CONFIG), 0)
tmp := $(call kokkos_update_config_header, KOKKOS_FWD_HPP_, "KokkosCore_Config_FwdBackend.tmp", "KokkosCore_Config_FwdBackend.hpp")
tmp := $(call kokkos_update_config_header, KOKKOS_SETUP_HPP_, "KokkosCore_Config_SetupBackend.tmp", "KokkosCore_Config_SetupBackend.hpp")
tmp := $(call kokkos_update_config_header, KOKKOS_DECLARE_HPP_, "KokkosCore_Config_DeclareBackend.tmp", "KokkosCore_Config_DeclareBackend.hpp")
tmp := $(call kokkos_update_config_header, KOKKOS_POST_INCLUDE_HPP_, "KokkosCore_Config_PostInclude.tmp", "KokkosCore_Config_PostInclude.hpp")
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
tmp := $(call kokkos_append_config_header,"$H""include <fwd/Kokkos_Fwd_CUDA.hpp>","KokkosCore_Config_FwdBackend.hpp")
tmp := $(call kokkos_append_config_header,"$H""include <decl/Kokkos_Declare_CUDA.hpp>","KokkosCore_Config_DeclareBackend.hpp")
@ -1289,10 +1271,6 @@ ifneq ($(KOKKOS_INTERNAL_NEW_CONFIG), 0)
tmp := $(call kokkos_append_config_header,"$H""include <fwd/Kokkos_Fwd_SERIAL.hpp>","KokkosCore_Config_FwdBackend.hpp")
tmp := $(call kokkos_append_config_header,"$H""include <decl/Kokkos_Declare_SERIAL.hpp>","KokkosCore_Config_DeclareBackend.hpp")
endif
ifeq ($(KOKKOS_INTERNAL_USE_MEMKIND), 1)
tmp := $(call kokkos_append_config_header,"$H""include <fwd/Kokkos_Fwd_HBWSpace.hpp>","KokkosCore_Config_FwdBackend.hpp")
tmp := $(call kokkos_append_config_header,"$H""include <decl/Kokkos_Declare_HBWSpace.hpp>","KokkosCore_Config_DeclareBackend.hpp")
endif
endif
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/*.hpp)
@ -1403,11 +1381,6 @@ ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1)
KOKKOS_TPL_LIBRARY_NAMES += hpx
endif
# Don't include Kokkos_HBWSpace.cpp if not using MEMKIND to avoid a link warning.
ifneq ($(KOKKOS_INTERNAL_USE_MEMKIND), 1)
KOKKOS_SRC := $(filter-out $(KOKKOS_PATH)/core/src/impl/Kokkos_HBWSpace.cpp,$(KOKKOS_SRC))
endif
# With Cygwin functions such as fdopen and fileno are not defined
# when strict ansi is enabled. strict ansi gets enabled with -std=c++14
# though. So we hard undefine it here. Not sure if that has any bad side effects
@ -1461,6 +1434,12 @@ ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1)
else
tmp := $(call desul_append_header,"/* $H""undef DESUL_ATOMICS_ENABLE_OPENMP */")
endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENACC), 1)
tmp := $(call desul_append_header,"$H""define DESUL_ATOMICS_ENABLE_OPENACC")
else
tmp := $(call desul_append_header,"/* $H""undef DESUL_ATOMICS_ENABLE_OPENACC */")
endif
tmp := $(call desul_append_header, "")
tmp := $(call desul_append_header, "$H""endif")
@ -1493,7 +1472,7 @@ include $(KOKKOS_PATH)/Makefile.targets
kokkos-clean:
rm -f $(KOKKOS_OBJ_LINK) $(DESUL_CONFIG_HEADER) $(DESUL_INTERNAL_CONFIG_TMP) KokkosCore_config.h KokkosCore_config.tmp libkokkos.a KokkosCore_Config_SetupBackend.hpp \
KokkosCore_Config_FwdBackend.hpp KokkosCore_Config_DeclareBackend.hpp KokkosCore_Config_DeclareBackend.tmp \
KokkosCore_Config_FwdBackend.tmp KokkosCore_Config_PostInclude.hpp KokkosCore_Config_PostInclude.tmp KokkosCore_Config_SetupBackend.tmp
KokkosCore_Config_FwdBackend.tmp KokkosCore_Config_SetupBackend.tmp
libkokkos.a: $(KOKKOS_OBJ_LINK) $(KOKKOS_SRC) $(KOKKOS_HEADERS)
ar cr libkokkos.a $(KOKKOS_OBJ_LINK)

View File

@ -20,8 +20,6 @@ Kokkos_TaskQueue.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Ta
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_TaskQueue.cpp
Kokkos_HostThreadTeam.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HostThreadTeam.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HostThreadTeam.cpp
Kokkos_Spinwait.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Spinwait.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Spinwait.cpp
Kokkos_HostBarrier.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HostBarrier.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HostBarrier.cpp
Kokkos_Profiling.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Profiling.cpp
@ -30,8 +28,6 @@ Kokkos_SharedAlloc.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_SharedAlloc.cpp
Kokkos_MemoryPool.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_MemoryPool.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_MemoryPool.cpp
Kokkos_MemorySpace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_MemorySpace.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_MemorySpace.cpp
Kokkos_HostSpace_deepcopy.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HostSpace_deepcopy.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HostSpace_deepcopy.cpp
Kokkos_NumericTraits.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_NumericTraits.cpp
@ -82,8 +78,10 @@ Lock_Array_HIP.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/tpls/desul/src/Lock_Array
endif
ifeq ($(KOKKOS_INTERNAL_USE_THREADS), 1)
Kokkos_ThreadsExec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec.cpp
Kokkos_Threads_Instance.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads/Kokkos_Threads_Instance.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_Threads_Instance.cpp
Kokkos_Threads_Spinwait.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads/Kokkos_Threads_Spinwait.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_Spinwait.cpp
endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
@ -123,6 +121,3 @@ Kokkos_OpenACC_Instance.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenACC
Kokkos_OpenACC_SharedAllocationRecord.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenACC/Kokkos_OpenACC_SharedAllocationRecord.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenACC/Kokkos_OpenACC_SharedAllocationRecord.cpp
endif
Kokkos_HBWSpace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HBWSpace.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HBWSpace.cpp

View File

@ -28,7 +28,7 @@ To start learning about Kokkos:
- [Use cases and Examples](https://kokkos.github.io/kokkos-core-wiki/usecases.html): a series of examples ranging from how to use Kokkos with MPI to Fortran interoperability.
For questions find us on Slack: https://kokkosteam.slack.com or open a github issue.
For questions find us on Slack: https://kokkosteam.slack.com or open a GitHub issue.
For non-public questions send an email to: *crtrott(at)sandia.gov*
@ -48,10 +48,10 @@ Please see the [following page](https://kokkos.github.io/kokkos-core-wiki/citati
# License
[![License](https://img.shields.io/badge/License-BSD%203--Clause-blue.svg)](https://opensource.org/licenses/BSD-3-Clause)
[![License](https://img.shields.io/badge/License-Apache--2.0_WITH_LLVM--exception-blue)](https://spdx.org/licenses/LLVM-exception.html)
Under the terms of Contract DE-NA0003525 with NTESS,
the U.S. Government retains certain rights in this software.
The full license statement used in all headers is available [here](https://kokkos.github.io/kokkos-core-wiki/license.html) or
[here](https://github.com/kokkos/kokkos/blob/master/LICENSE).
The full license statement used in all headers is available [here](https://kokkos.org/kokkos-core-wiki/license.html) or
[here](https://github.com/kokkos/kokkos/blob/develop/LICENSE).

12
lib/kokkos/SECURITY.md Normal file
View File

@ -0,0 +1,12 @@
# Reporting Security Issues
To report a security issue, please email
[lebrungrandt@ornl.gov](mailto:lebrungrandt@ornl.gov)
and [crtrott@sandia.gov](mailto:crtrott@sandia.gov)
with a description of the issue, the steps you took to create the issue,
affected versions, and, if known, mitigations for the issue.
Our vulnerability management team will respond within 5 working days of your
email. If the issue is confirmed as a vulnerability, we will open a
Security Advisory and acknowledge your contributions as part of it. This project
follows a 90 day disclosure timeline.

View File

@ -159,7 +159,6 @@ If you don't specify a CUDA build variant in a `packages.yaml` and you build you
> spack install superscience
````
you may end up just getting the default Kokkos (i.e. Serial).
Some examples are included in the `config/yaml` folder for common platforms.
Before running `spack install <package>` we recommend running `spack spec <package>` to confirm your dependency tree is correct.
For example, with Kokkos Kernels:
````bash

View File

@ -30,5 +30,5 @@ KOKKOS_LIB_INCLUDE_DIRECTORIES(kokkosalgorithms
${CMAKE_CURRENT_SOURCE_DIR}
)
KOKKOS_LINK_TPL(kokkoscontainers PUBLIC ROCTHRUST)
KOKKOS_LINK_TPL(kokkoscore PUBLIC ONEDPL)

View File

@ -849,18 +849,17 @@ class Random_XorShift64 {
return drand(end - start) + start;
}
// Marsaglia polar method for drawing a standard normal distributed random
// Box-muller method for drawing a standard normal distributed random
// number
KOKKOS_INLINE_FUNCTION
double normal() {
double S = 2.0;
double U;
while (S >= 1.0) {
U = 2.0 * drand() - 1.0;
const double V = 2.0 * drand() - 1.0;
S = U * U + V * V;
}
return U * std::sqrt(-2.0 * std::log(S) / S);
constexpr auto two_pi = 2 * Kokkos::numbers::pi_v<double>;
const double u = drand();
const double v = drand();
const double r = Kokkos::sqrt(-2.0 * Kokkos::log(u));
const double theta = v * two_pi;
return r * Kokkos::cos(theta);
}
KOKKOS_INLINE_FUNCTION
@ -1094,18 +1093,17 @@ class Random_XorShift1024 {
return drand(end - start) + start;
}
// Marsaglia polar method for drawing a standard normal distributed random
// Box-muller method for drawing a standard normal distributed random
// number
KOKKOS_INLINE_FUNCTION
double normal() {
double S = 2.0;
double U;
while (S >= 1.0) {
U = 2.0 * drand() - 1.0;
const double V = 2.0 * drand() - 1.0;
S = U * U + V * V;
}
return U * std::sqrt(-2.0 * std::log(S) / S);
constexpr auto two_pi = 2 * Kokkos::numbers::pi_v<double>;
const double u = drand();
const double v = drand();
const double r = Kokkos::sqrt(-2.0 * Kokkos::log(u));
const double theta = v * two_pi;
return r * Kokkos::cos(theta);
}
KOKKOS_INLINE_FUNCTION
@ -1545,13 +1543,23 @@ template <class ViewType, class RandomPool, class IndexType = int64_t>
void fill_random(ViewType a, RandomPool g,
typename ViewType::const_value_type begin,
typename ViewType::const_value_type end) {
fill_random(typename ViewType::execution_space{}, a, g, begin, end);
Kokkos::fence(
"fill_random: fence before since no execution space instance provided");
typename ViewType::execution_space exec;
fill_random(exec, a, g, begin, end);
exec.fence(
"fill_random: fence after since no execution space instance provided");
}
template <class ViewType, class RandomPool, class IndexType = int64_t>
void fill_random(ViewType a, RandomPool g,
typename ViewType::const_value_type range) {
fill_random(typename ViewType::execution_space{}, a, g, 0, range);
Kokkos::fence(
"fill_random: fence before since no execution space instance provided");
typename ViewType::execution_space exec;
fill_random(exec, a, g, 0, range);
exec.fence(
"fill_random: fence after since no execution space instance provided");
}
} // namespace Kokkos

View File

@ -23,6 +23,7 @@
#include "sorting/Kokkos_BinSortPublicAPI.hpp"
#include "sorting/Kokkos_SortPublicAPI.hpp"
#include "sorting/Kokkos_SortByKeyPublicAPI.hpp"
#include "sorting/Kokkos_NestedSortPublicAPI.hpp"
#ifdef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_SORT

View File

@ -35,7 +35,6 @@
// following the std classification.
// modifying ops
#include "std_algorithms/Kokkos_Swap.hpp"
#include "std_algorithms/Kokkos_IterSwap.hpp"
// non-modifying sequence

View File

@ -0,0 +1,117 @@
//@HEADER
// ************************************************************************
//
// Kokkos v. 4.0
// Copyright (2022) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
// See https://kokkos.org/LICENSE for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//@HEADER
#ifndef KOKKOS_SORT_BY_KEY_PUBLIC_API_HPP_
#define KOKKOS_SORT_BY_KEY_PUBLIC_API_HPP_
#include "./impl/Kokkos_SortByKeyImpl.hpp"
#include <Kokkos_Core.hpp>
#include <algorithm>
namespace Kokkos::Experimental {
// ---------------------------------------------------------------
// basic overloads
// ---------------------------------------------------------------
template <class ExecutionSpace, class KeysDataType, class... KeysProperties,
class ValuesDataType, class... ValuesProperties>
void sort_by_key(
const ExecutionSpace& exec,
const Kokkos::View<KeysDataType, KeysProperties...>& keys,
const Kokkos::View<ValuesDataType, ValuesProperties...>& values) {
// constraints
using KeysType = Kokkos::View<KeysDataType, KeysProperties...>;
using ValuesType = Kokkos::View<ValuesDataType, ValuesProperties...>;
::Kokkos::Impl::static_assert_is_admissible_to_kokkos_sort_by_key(keys);
::Kokkos::Impl::static_assert_is_admissible_to_kokkos_sort_by_key(values);
static_assert(SpaceAccessibility<ExecutionSpace,
typename KeysType::memory_space>::accessible,
"Kokkos::sort: execution space instance is not able to access "
"the memory space of the keys View argument!");
static_assert(
SpaceAccessibility<ExecutionSpace,
typename ValuesType::memory_space>::accessible,
"Kokkos::sort: execution space instance is not able to access "
"the memory space of the values View argument!");
static_assert(KeysType::static_extent(0) == 0 ||
ValuesType::static_extent(0) == 0 ||
KeysType::static_extent(0) == ValuesType::static_extent(0));
if (values.size() != keys.size())
Kokkos::abort((std::string("values and keys extents must be the same. The "
"values extent is ") +
std::to_string(values.size()) + ", and the keys extent is " +
std::to_string(keys.size()) + ".")
.c_str());
if (keys.extent(0) <= 1) {
return;
}
::Kokkos::Impl::sort_by_key_device_view_without_comparator(exec, keys,
values);
}
// ---------------------------------------------------------------
// overloads supporting a custom comparator
// ---------------------------------------------------------------
template <class ExecutionSpace, class ComparatorType, class KeysDataType,
class... KeysProperties, class ValuesDataType,
class... ValuesProperties>
void sort_by_key(
const ExecutionSpace& exec,
const Kokkos::View<KeysDataType, KeysProperties...>& keys,
const Kokkos::View<ValuesDataType, ValuesProperties...>& values,
const ComparatorType& comparator) {
// constraints
using KeysType = Kokkos::View<KeysDataType, KeysProperties...>;
using ValuesType = Kokkos::View<ValuesDataType, ValuesProperties...>;
::Kokkos::Impl::static_assert_is_admissible_to_kokkos_sort_by_key(keys);
::Kokkos::Impl::static_assert_is_admissible_to_kokkos_sort_by_key(values);
static_assert(SpaceAccessibility<ExecutionSpace,
typename KeysType::memory_space>::accessible,
"Kokkos::sort: execution space instance is not able to access "
"the memory space of the keys View argument!");
static_assert(
SpaceAccessibility<ExecutionSpace,
typename ValuesType::memory_space>::accessible,
"Kokkos::sort: execution space instance is not able to access "
"the memory space of the values View argument!");
static_assert(KeysType::static_extent(0) == 0 ||
ValuesType::static_extent(0) == 0 ||
KeysType::static_extent(0) == ValuesType::static_extent(0));
if (values.size() != keys.size())
Kokkos::abort((std::string("values and keys extents must be the same. The "
"values extent is ") +
std::to_string(values.size()) + ", and the keys extent is " +
std::to_string(keys.size()) + ".")
.c_str());
if (keys.extent(0) <= 1) {
return;
}
::Kokkos::Impl::sort_by_key_device_view_with_comparator(exec, keys, values,
comparator);
}
} // namespace Kokkos::Experimental
#endif

View File

@ -29,7 +29,7 @@ namespace Kokkos {
// ---------------------------------------------------------------
template <class ExecutionSpace, class DataType, class... Properties>
void sort([[maybe_unused]] const ExecutionSpace& exec,
void sort(const ExecutionSpace& exec,
const Kokkos::View<DataType, Properties...>& view) {
// constraints
using ViewType = Kokkos::View<DataType, Properties...>;
@ -52,6 +52,7 @@ void sort([[maybe_unused]] const ExecutionSpace& exec,
}
if constexpr (Impl::better_off_calling_std_sort_v<ExecutionSpace>) {
exec.fence("Kokkos::sort without comparator use std::sort");
auto first = ::Kokkos::Experimental::begin(view);
auto last = ::Kokkos::Experimental::end(view);
std::sort(first, last);
@ -82,7 +83,7 @@ void sort(const Kokkos::View<DataType, Properties...>& view) {
// ---------------------------------------------------------------
template <class ExecutionSpace, class ComparatorType, class DataType,
class... Properties>
void sort([[maybe_unused]] const ExecutionSpace& exec,
void sort(const ExecutionSpace& exec,
const Kokkos::View<DataType, Properties...>& view,
const ComparatorType& comparator) {
// constraints
@ -105,6 +106,7 @@ void sort([[maybe_unused]] const ExecutionSpace& exec,
}
if constexpr (Impl::better_off_calling_std_sort_v<ExecutionSpace>) {
exec.fence("Kokkos::sort with comparator use std::sort");
auto first = ::Kokkos::Experimental::begin(view);
auto last = ::Kokkos::Experimental::end(view);
std::sort(first, last, comparator);

View File

@ -18,7 +18,6 @@
#define KOKKOS_NESTED_SORT_IMPL_HPP_
#include <Kokkos_Core.hpp>
#include <std_algorithms/Kokkos_Swap.hpp>
namespace Kokkos {
namespace Experimental {
@ -99,7 +98,7 @@ KOKKOS_INLINE_FUNCTION void sort_nested_impl(
keyView(elem1) = key2;
keyView(elem2) = key1;
if constexpr (!std::is_same_v<ValueViewType, std::nullptr_t>) {
Kokkos::Experimental::swap(valueView(elem1), valueView(elem2));
Kokkos::kokkos_swap(valueView(elem1), valueView(elem2));
}
}
}

View File

@ -0,0 +1,401 @@
//@HEADER
// ************************************************************************
//
// Kokkos v. 4.0
// Copyright (2022) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
// See https://kokkos.org/LICENSE for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//@HEADER
#ifndef KOKKOS_SORT_BY_KEY_FREE_FUNCS_IMPL_HPP_
#define KOKKOS_SORT_BY_KEY_FREE_FUNCS_IMPL_HPP_
#include <Kokkos_Core.hpp>
#if defined(KOKKOS_ENABLE_CUDA)
// Workaround for `Instruction 'shfl' without '.sync' is not supported on
// .target sm_70 and higher from PTX ISA version 6.4`.
// Also see https://github.com/NVIDIA/cub/pull/170.
#if !defined(CUB_USE_COOPERATIVE_GROUPS)
#define CUB_USE_COOPERATIVE_GROUPS
#endif
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wshadow"
#if defined(KOKKOS_COMPILER_CLANG)
// Some versions of Clang fail to compile Thrust, failing with errors like
// this:
// <snip>/thrust/system/cuda/detail/core/agent_launcher.h:557:11:
// error: use of undeclared identifier 'va_printf'
// The exact combination of versions for Clang and Thrust (or CUDA) for this
// failure was not investigated, however even very recent version combination
// (Clang 10.0.0 and Cuda 10.0) demonstrated failure.
//
// Defining _CubLog here locally allows us to avoid that code path, however
// disabling some debugging diagnostics
#pragma push_macro("_CubLog")
#ifdef _CubLog
#undef _CubLog
#endif
#define _CubLog
#include <thrust/device_ptr.h>
#include <thrust/sort.h>
#pragma pop_macro("_CubLog")
#else
#include <thrust/device_ptr.h>
#include <thrust/sort.h>
#endif
#pragma GCC diagnostic pop
#endif
#if defined(KOKKOS_ENABLE_ROCTHRUST)
#include <thrust/device_ptr.h>
#include <thrust/sort.h>
#endif
#if defined(KOKKOS_ENABLE_ONEDPL) && \
(ONEDPL_VERSION_MAJOR > 2022 || \
(ONEDPL_VERSION_MAJOR == 2022 && ONEDPL_VERSION_MINOR >= 2))
#define KOKKOS_ONEDPL_HAS_SORT_BY_KEY
#include <oneapi/dpl/execution>
#include <oneapi/dpl/algorithm>
#endif
namespace Kokkos::Impl {
template <typename T>
constexpr inline bool is_admissible_to_kokkos_sort_by_key =
::Kokkos::is_view<T>::value&& T::rank() == 1 &&
(std::is_same<typename T::traits::array_layout,
Kokkos::LayoutLeft>::value ||
std::is_same<typename T::traits::array_layout,
Kokkos::LayoutRight>::value ||
std::is_same<typename T::traits::array_layout,
Kokkos::LayoutStride>::value);
template <class ViewType>
KOKKOS_INLINE_FUNCTION constexpr void
static_assert_is_admissible_to_kokkos_sort_by_key(const ViewType& /* view */) {
static_assert(is_admissible_to_kokkos_sort_by_key<ViewType>,
"Kokkos::sort_by_key only accepts 1D values View with "
"LayoutRight, LayoutLeft or LayoutStride.");
}
// For the fallback implementation for sort_by_key using Kokkos::sort, we need
// to consider if Kokkos::sort defers to the fallback implementation that copies
// the array to the host and uses std::sort, see
// copy_to_host_run_stdsort_copy_back() in impl/Kokkos_SortImpl.hpp. If
// sort_on_device_v is true, we assume that std::sort doesn't copy data.
// Otherwise, we manually copy all data to the host and provide Kokkos::sort
// with a host execution space.
template <class ExecutionSpace, class Layout>
inline constexpr bool sort_on_device_v = false;
#if defined(KOKKOS_ENABLE_CUDA)
template <class Layout>
inline constexpr bool sort_on_device_v<Kokkos::Cuda, Layout> = true;
template <class KeysDataType, class... KeysProperties, class ValuesDataType,
class... ValuesProperties, class... MaybeComparator>
void sort_by_key_cudathrust(
const Kokkos::Cuda& exec,
const Kokkos::View<KeysDataType, KeysProperties...>& keys,
const Kokkos::View<ValuesDataType, ValuesProperties...>& values,
MaybeComparator&&... maybeComparator) {
const auto policy = thrust::cuda::par.on(exec.cuda_stream());
auto keys_first = ::Kokkos::Experimental::begin(keys);
auto keys_last = ::Kokkos::Experimental::end(keys);
auto values_first = ::Kokkos::Experimental::begin(values);
thrust::sort_by_key(policy, keys_first, keys_last, values_first,
std::forward<MaybeComparator>(maybeComparator)...);
}
#endif
#if defined(KOKKOS_ENABLE_ROCTHRUST)
template <class Layout>
inline constexpr bool sort_on_device_v<Kokkos::HIP, Layout> = true;
template <class KeysDataType, class... KeysProperties, class ValuesDataType,
class... ValuesProperties, class... MaybeComparator>
void sort_by_key_rocthrust(
const Kokkos::HIP& exec,
const Kokkos::View<KeysDataType, KeysProperties...>& keys,
const Kokkos::View<ValuesDataType, ValuesProperties...>& values,
MaybeComparator&&... maybeComparator) {
const auto policy = thrust::hip::par.on(exec.hip_stream());
auto keys_first = ::Kokkos::Experimental::begin(keys);
auto keys_last = ::Kokkos::Experimental::end(keys);
auto values_first = ::Kokkos::Experimental::begin(values);
thrust::sort_by_key(policy, keys_first, keys_last, values_first,
std::forward<MaybeComparator>(maybeComparator)...);
}
#endif
#if defined(KOKKOS_ENABLE_ONEDPL)
template <class Layout>
inline constexpr bool sort_on_device_v<Kokkos::Experimental::SYCL, Layout> =
std::is_same_v<Layout, Kokkos::LayoutLeft> ||
std::is_same_v<Layout, Kokkos::LayoutRight>;
#ifdef KOKKOS_ONEDPL_HAS_SORT_BY_KEY
template <class KeysDataType, class... KeysProperties, class ValuesDataType,
class... ValuesProperties, class... MaybeComparator>
void sort_by_key_onedpl(
const Kokkos::Experimental::SYCL& exec,
const Kokkos::View<KeysDataType, KeysProperties...>& keys,
const Kokkos::View<ValuesDataType, ValuesProperties...>& values,
MaybeComparator&&... maybeComparator) {
if (keys.stride(0) != 1 && values.stride(0) != 1) {
Kokkos::abort(
"SYCL sort_by_key only supports rank-1 Views with stride(0) = 1.");
}
// Can't use Experimental::begin/end here since the oneDPL then assumes that
// the data is on the host.
auto queue = exec.sycl_queue();
auto policy = oneapi::dpl::execution::make_device_policy(queue);
const int n = keys.extent(0);
oneapi::dpl::sort_by_key(policy, keys.data(), keys.data() + n, values.data(),
std::forward<MaybeComparator>(maybeComparator)...);
}
#endif
#endif
template <typename ExecutionSpace, typename PermutationView, typename ViewType>
void applyPermutation(const ExecutionSpace& space,
const PermutationView& permutation,
const ViewType& view) {
static_assert(std::is_integral<typename PermutationView::value_type>::value);
auto view_copy = Kokkos::create_mirror(
Kokkos::view_alloc(space, typename ExecutionSpace::memory_space{},
Kokkos::WithoutInitializing),
view);
Kokkos::deep_copy(space, view_copy, view);
Kokkos::parallel_for(
"Kokkos::sort_by_key_via_sort::permute_" + view.label(),
Kokkos::RangePolicy<ExecutionSpace>(space, 0, view.extent(0)),
KOKKOS_LAMBDA(int i) { view(i) = view_copy(permutation(i)); });
}
template <class ExecutionSpace, class KeysDataType, class... KeysProperties,
class ValuesDataType, class... ValuesProperties,
class... MaybeComparator>
void sort_by_key_via_sort(
const ExecutionSpace& exec,
const Kokkos::View<KeysDataType, KeysProperties...>& keys,
const Kokkos::View<ValuesDataType, ValuesProperties...>& values,
MaybeComparator&&... maybeComparator) {
static_assert(sizeof...(MaybeComparator) <= 1);
auto const n = keys.size();
Kokkos::View<unsigned int*, ExecutionSpace> permute(
Kokkos::view_alloc(exec, Kokkos::WithoutInitializing,
"Kokkos::sort_by_key_via_sort::permute"),
n);
// iota
Kokkos::parallel_for(
"Kokkos::sort_by_key_via_sort::iota",
Kokkos::RangePolicy<ExecutionSpace>(exec, 0, n),
KOKKOS_LAMBDA(int i) { permute(i) = i; });
using Layout =
typename Kokkos::View<unsigned int*, ExecutionSpace>::array_layout;
if constexpr (!sort_on_device_v<ExecutionSpace, Layout>) {
auto host_keys = Kokkos::create_mirror_view(
Kokkos::view_alloc(Kokkos::HostSpace{}, Kokkos::WithoutInitializing),
keys);
auto host_permute = Kokkos::create_mirror_view(
Kokkos::view_alloc(Kokkos::HostSpace{}, Kokkos::WithoutInitializing),
permute);
Kokkos::deep_copy(exec, host_keys, keys);
Kokkos::deep_copy(exec, host_permute, permute);
exec.fence("Kokkos::Impl::sort_by_key_via_sort: before host sort");
Kokkos::DefaultHostExecutionSpace host_exec;
if constexpr (sizeof...(MaybeComparator) == 0) {
Kokkos::sort(
host_exec, host_permute,
KOKKOS_LAMBDA(int i, int j) { return host_keys(i) < host_keys(j); });
} else {
auto keys_comparator =
std::get<0>(std::tuple<MaybeComparator...>(maybeComparator...));
Kokkos::sort(
host_exec, host_permute, KOKKOS_LAMBDA(int i, int j) {
return keys_comparator(host_keys(i), host_keys(j));
});
}
host_exec.fence("Kokkos::Impl::sort_by_key_via_sort: after host sort");
Kokkos::deep_copy(exec, permute, host_permute);
} else {
#ifdef KOKKOS_ENABLE_SYCL
auto* raw_keys_in_comparator = keys.data();
auto stride = keys.stride(0);
if constexpr (sizeof...(MaybeComparator) == 0) {
Kokkos::sort(
exec, permute, KOKKOS_LAMBDA(int i, int j) {
return raw_keys_in_comparator[i * stride] <
raw_keys_in_comparator[j * stride];
});
} else {
auto keys_comparator =
std::get<0>(std::tuple<MaybeComparator...>(maybeComparator...));
Kokkos::sort(
exec, permute, KOKKOS_LAMBDA(int i, int j) {
return keys_comparator(raw_keys_in_comparator[i * stride],
raw_keys_in_comparator[j * stride]);
});
}
#else
if constexpr (sizeof...(MaybeComparator) == 0) {
Kokkos::sort(
exec, permute,
KOKKOS_LAMBDA(int i, int j) { return keys(i) < keys(j); });
} else {
auto keys_comparator =
std::get<0>(std::tuple<MaybeComparator...>(maybeComparator...));
Kokkos::sort(
exec, permute, KOKKOS_LAMBDA(int i, int j) {
return keys_comparator(keys(i), keys(j));
});
}
#endif
}
applyPermutation(exec, permute, keys);
applyPermutation(exec, permute, values);
}
// ------------------------------------------------------
//
// specialize cases for sorting by key without comparator
//
// ------------------------------------------------------
#if defined(KOKKOS_ENABLE_CUDA)
template <class KeysDataType, class... KeysProperties, class ValuesDataType,
class... ValuesProperties>
void sort_by_key_device_view_without_comparator(
const Kokkos::Cuda& exec,
const Kokkos::View<KeysDataType, KeysProperties...>& keys,
const Kokkos::View<ValuesDataType, ValuesProperties...>& values) {
sort_by_key_cudathrust(exec, keys, values);
}
#endif
#if defined(KOKKOS_ENABLE_ROCTHRUST)
template <class KeysDataType, class... KeysProperties, class ValuesDataType,
class... ValuesProperties>
void sort_by_key_device_view_without_comparator(
const Kokkos::HIP& exec,
const Kokkos::View<KeysDataType, KeysProperties...>& keys,
const Kokkos::View<ValuesDataType, ValuesProperties...>& values) {
sort_by_key_rocthrust(exec, keys, values);
}
#endif
#if defined(KOKKOS_ENABLE_ONEDPL)
template <class KeysDataType, class... KeysProperties, class ValuesDataType,
class... ValuesProperties>
void sort_by_key_device_view_without_comparator(
const Kokkos::Experimental::SYCL& exec,
const Kokkos::View<KeysDataType, KeysProperties...>& keys,
const Kokkos::View<ValuesDataType, ValuesProperties...>& values) {
#ifdef KOKKOS_ONEDPL_HAS_SORT_BY_KEY
if (keys.stride(0) == 1 && values.stride(0) == 1)
sort_by_key_onedpl(exec, keys, values);
else
#endif
sort_by_key_via_sort(exec, keys, values);
}
#endif
// fallback case
template <class ExecutionSpace, class KeysDataType, class... KeysProperties,
class ValuesDataType, class... ValuesProperties>
std::enable_if_t<Kokkos::is_execution_space<ExecutionSpace>::value>
sort_by_key_device_view_without_comparator(
const ExecutionSpace& exec,
const Kokkos::View<KeysDataType, KeysProperties...>& keys,
const Kokkos::View<ValuesDataType, ValuesProperties...>& values) {
sort_by_key_via_sort(exec, keys, values);
}
// ---------------------------------------------------
//
// specialize cases for sorting by key with comparator
//
// ---------------------------------------------------
#if defined(KOKKOS_ENABLE_CUDA)
template <class ComparatorType, class KeysDataType, class... KeysProperties,
class ValuesDataType, class... ValuesProperties>
void sort_by_key_device_view_with_comparator(
const Kokkos::Cuda& exec,
const Kokkos::View<KeysDataType, KeysProperties...>& keys,
const Kokkos::View<ValuesDataType, ValuesProperties...>& values,
const ComparatorType& comparator) {
sort_by_key_cudathrust(exec, keys, values, comparator);
}
#endif
#if defined(KOKKOS_ENABLE_ROCTHRUST)
template <class ComparatorType, class KeysDataType, class... KeysProperties,
class ValuesDataType, class... ValuesProperties>
void sort_by_key_device_view_with_comparator(
const Kokkos::HIP& exec,
const Kokkos::View<KeysDataType, KeysProperties...>& keys,
const Kokkos::View<ValuesDataType, ValuesProperties...>& values,
const ComparatorType& comparator) {
sort_by_key_rocthrust(exec, keys, values, comparator);
}
#endif
#if defined(KOKKOS_ENABLE_ONEDPL)
template <class ComparatorType, class KeysDataType, class... KeysProperties,
class ValuesDataType, class... ValuesProperties>
void sort_by_key_device_view_with_comparator(
const Kokkos::Experimental::SYCL& exec,
const Kokkos::View<KeysDataType, KeysProperties...>& keys,
const Kokkos::View<ValuesDataType, ValuesProperties...>& values,
const ComparatorType& comparator) {
#ifdef KOKKOS_ONEDPL_HAS_SORT_BY_KEY
if (keys.stride(0) == 1 && values.stride(0) == 1)
sort_by_key_onedpl(exec, keys, values, comparator);
else
#endif
sort_by_key_via_sort(exec, keys, values, comparator);
}
#endif
// fallback case
template <class ComparatorType, class ExecutionSpace, class KeysDataType,
class... KeysProperties, class ValuesDataType,
class... ValuesProperties>
std::enable_if_t<Kokkos::is_execution_space<ExecutionSpace>::value>
sort_by_key_device_view_with_comparator(
const ExecutionSpace& exec,
const Kokkos::View<KeysDataType, KeysProperties...>& keys,
const Kokkos::View<ValuesDataType, ValuesProperties...>& values,
const ComparatorType& comparator) {
sort_by_key_via_sort(exec, keys, values, comparator);
}
#undef KOKKOS_ONEDPL_HAS_SORT_BY_KEY
} // namespace Kokkos::Impl
#endif

View File

@ -63,6 +63,11 @@
#endif
#if defined(KOKKOS_ENABLE_ROCTHRUST)
#include <thrust/device_ptr.h>
#include <thrust/sort.h>
#endif
#if defined(KOKKOS_ENABLE_ONEDPL)
#include <oneapi/dpl/execution>
#include <oneapi/dpl/algorithm>
@ -184,6 +189,26 @@ void sort_cudathrust(const Cuda& space,
}
#endif
#if defined(KOKKOS_ENABLE_ROCTHRUST)
template <class DataType, class... Properties, class... MaybeComparator>
void sort_rocthrust(const HIP& space,
const Kokkos::View<DataType, Properties...>& view,
MaybeComparator&&... maybeComparator) {
using ViewType = Kokkos::View<DataType, Properties...>;
static_assert(ViewType::rank == 1,
"Kokkos::sort: currently only supports rank-1 Views.");
if (view.extent(0) <= 1) {
return;
}
const auto exec = thrust::hip::par.on(space.hip_stream());
auto first = ::Kokkos::Experimental::begin(view);
auto last = ::Kokkos::Experimental::end(view);
thrust::sort(exec, first, last,
std::forward<MaybeComparator>(maybeComparator)...);
}
#endif
#if defined(KOKKOS_ENABLE_ONEDPL)
template <class DataType, class... Properties, class... MaybeComparator>
void sort_onedpl(const Kokkos::Experimental::SYCL& space,
@ -274,6 +299,14 @@ void sort_device_view_without_comparator(
}
#endif
#if defined(KOKKOS_ENABLE_ROCTHRUST)
template <class DataType, class... Properties>
void sort_device_view_without_comparator(
const HIP& exec, const Kokkos::View<DataType, Properties...>& view) {
sort_rocthrust(exec, view);
}
#endif
#if defined(KOKKOS_ENABLE_ONEDPL)
template <class DataType, class... Properties>
void sort_device_view_without_comparator(
@ -320,6 +353,15 @@ void sort_device_view_with_comparator(
}
#endif
#if defined(KOKKOS_ENABLE_ROCTHRUST)
template <class ComparatorType, class DataType, class... Properties>
void sort_device_view_with_comparator(
const HIP& exec, const Kokkos::View<DataType, Properties...>& view,
const ComparatorType& comparator) {
sort_rocthrust(exec, view, comparator);
}
#endif
#if defined(KOKKOS_ENABLE_ONEDPL)
template <class ComparatorType, class DataType, class... Properties>
void sort_device_view_with_comparator(

View File

@ -50,7 +50,7 @@ template <
std::enable_if_t<::Kokkos::is_execution_space_v<ExecutionSpace>, int> = 0>
auto copy(const ExecutionSpace& ex,
const ::Kokkos::View<DataType1, Properties1...>& source,
::Kokkos::View<DataType2, Properties2...>& dest) {
const ::Kokkos::View<DataType2, Properties2...>& dest) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
@ -66,7 +66,7 @@ template <
std::enable_if_t<::Kokkos::is_execution_space_v<ExecutionSpace>, int> = 0>
auto copy(const std::string& label, const ExecutionSpace& ex,
const ::Kokkos::View<DataType1, Properties1...>& source,
::Kokkos::View<DataType2, Properties2...>& dest) {
const ::Kokkos::View<DataType2, Properties2...>& dest) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
@ -93,7 +93,7 @@ template <typename TeamHandleType, typename DataType1, typename... Properties1,
KOKKOS_FUNCTION auto copy(
const TeamHandleType& teamHandle,
const ::Kokkos::View<DataType1, Properties1...>& source,
::Kokkos::View<DataType2, Properties2...>& dest) {
const ::Kokkos::View<DataType2, Properties2...>& dest) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);

View File

@ -50,7 +50,7 @@ template <
std::enable_if_t<::Kokkos::is_execution_space_v<ExecutionSpace>, int> = 0>
auto copy_backward(const ExecutionSpace& ex,
const ::Kokkos::View<DataType1, Properties1...>& source,
::Kokkos::View<DataType2, Properties2...>& dest) {
const ::Kokkos::View<DataType2, Properties2...>& dest) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
@ -65,7 +65,7 @@ template <
std::enable_if_t<::Kokkos::is_execution_space_v<ExecutionSpace>, int> = 0>
auto copy_backward(const std::string& label, const ExecutionSpace& ex,
const ::Kokkos::View<DataType1, Properties1...>& source,
::Kokkos::View<DataType2, Properties2...>& dest) {
const ::Kokkos::View<DataType2, Properties2...>& dest) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
@ -92,7 +92,7 @@ template <typename TeamHandleType, typename DataType1, typename... Properties1,
KOKKOS_FUNCTION auto copy_backward(
const TeamHandleType& teamHandle,
const ::Kokkos::View<DataType1, Properties1...>& source,
::Kokkos::View<DataType2, Properties2...>& dest) {
const ::Kokkos::View<DataType2, Properties2...>& dest) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);

View File

@ -54,7 +54,8 @@ template <
std::enable_if_t<::Kokkos::is_execution_space_v<ExecutionSpace>, int> = 0>
auto copy_if(const ExecutionSpace& ex,
const ::Kokkos::View<DataType1, Properties1...>& source,
::Kokkos::View<DataType2, Properties2...>& dest, Predicate pred) {
const ::Kokkos::View<DataType2, Properties2...>& dest,
Predicate pred) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
@ -69,7 +70,8 @@ template <
std::enable_if_t<::Kokkos::is_execution_space_v<ExecutionSpace>, int> = 0>
auto copy_if(const std::string& label, const ExecutionSpace& ex,
const ::Kokkos::View<DataType1, Properties1...>& source,
::Kokkos::View<DataType2, Properties2...>& dest, Predicate pred) {
const ::Kokkos::View<DataType2, Properties2...>& dest,
Predicate pred) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
@ -96,7 +98,7 @@ template <typename TeamHandleType, typename DataType1, typename... Properties1,
KOKKOS_FUNCTION auto copy_if(
const TeamHandleType& teamHandle,
const ::Kokkos::View<DataType1, Properties1...>& source,
::Kokkos::View<DataType2, Properties2...>& dest, Predicate pred) {
const ::Kokkos::View<DataType2, Properties2...>& dest, Predicate pred) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);

View File

@ -51,7 +51,7 @@ template <
std::enable_if_t<::Kokkos::is_execution_space_v<ExecutionSpace>, int> = 0>
auto copy_n(const ExecutionSpace& ex,
const ::Kokkos::View<DataType1, Properties1...>& source, Size count,
::Kokkos::View<DataType2, Properties2...>& dest) {
const ::Kokkos::View<DataType2, Properties2...>& dest) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
@ -66,7 +66,7 @@ template <
std::enable_if_t<::Kokkos::is_execution_space_v<ExecutionSpace>, int> = 0>
auto copy_n(const std::string& label, const ExecutionSpace& ex,
const ::Kokkos::View<DataType1, Properties1...>& source, Size count,
::Kokkos::View<DataType2, Properties2...>& dest) {
const ::Kokkos::View<DataType2, Properties2...>& dest) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
@ -93,7 +93,7 @@ template <typename TeamHandleType, typename DataType1, typename... Properties1,
KOKKOS_FUNCTION auto copy_n(
const TeamHandleType& teamHandle,
const ::Kokkos::View<DataType1, Properties1...>& source, Size count,
::Kokkos::View<DataType2, Properties2...>& dest) {
const ::Kokkos::View<DataType2, Properties2...>& dest) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);

View File

@ -80,7 +80,7 @@ template <
std::enable_if_t<::Kokkos::is_execution_space_v<ExecutionSpace>, int> = 0>
bool equal(const ExecutionSpace& ex,
const ::Kokkos::View<DataType1, Properties1...>& view1,
::Kokkos::View<DataType2, Properties2...>& view2) {
const ::Kokkos::View<DataType2, Properties2...>& view2) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2);
@ -96,7 +96,7 @@ template <
std::enable_if_t<::Kokkos::is_execution_space_v<ExecutionSpace>, int> = 0>
bool equal(const std::string& label, const ExecutionSpace& ex,
const ::Kokkos::View<DataType1, Properties1...>& view1,
::Kokkos::View<DataType2, Properties2...>& view2) {
const ::Kokkos::View<DataType2, Properties2...>& view2) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2);
@ -111,7 +111,7 @@ template <
std::enable_if_t<::Kokkos::is_execution_space_v<ExecutionSpace>, int> = 0>
bool equal(const ExecutionSpace& ex,
const ::Kokkos::View<DataType1, Properties1...>& view1,
::Kokkos::View<DataType2, Properties2...>& view2,
const ::Kokkos::View<DataType2, Properties2...>& view2,
BinaryPredicateType predicate) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2);
@ -128,7 +128,7 @@ template <
std::enable_if_t<::Kokkos::is_execution_space_v<ExecutionSpace>, int> = 0>
bool equal(const std::string& label, const ExecutionSpace& ex,
const ::Kokkos::View<DataType1, Properties1...>& view1,
::Kokkos::View<DataType2, Properties2...>& view2,
const ::Kokkos::View<DataType2, Properties2...>& view2,
BinaryPredicateType predicate) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2);
@ -227,7 +227,7 @@ template <typename TeamHandleType, typename DataType1, typename... Properties1,
KOKKOS_FUNCTION bool equal(
const TeamHandleType& teamHandle,
const ::Kokkos::View<DataType1, Properties1...>& view1,
::Kokkos::View<DataType2, Properties2...>& view2) {
const ::Kokkos::View<DataType2, Properties2...>& view2) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2);
@ -243,7 +243,7 @@ template <typename TeamHandleType, typename DataType1, typename... Properties1,
KOKKOS_FUNCTION bool equal(
const TeamHandleType& teamHandle,
const ::Kokkos::View<DataType1, Properties1...>& view1,
::Kokkos::View<DataType2, Properties2...>& view2,
const ::Kokkos::View<DataType2, Properties2...>& view2,
BinaryPredicateType predicate) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2);

View File

@ -19,7 +19,6 @@
#include <Kokkos_Core.hpp>
#include "impl/Kokkos_Constraints.hpp"
#include "Kokkos_Swap.hpp"
namespace Kokkos {
namespace Experimental {
@ -33,7 +32,7 @@ struct StdIterSwapFunctor {
KOKKOS_FUNCTION
void operator()(int i) const {
(void)i;
::Kokkos::Experimental::swap(*m_a, *m_b);
::Kokkos::kokkos_swap(*m_a, *m_b);
}
KOKKOS_FUNCTION
@ -58,6 +57,16 @@ void iter_swap(IteratorType1 a, IteratorType2 b) {
Impl::iter_swap_impl(a, b);
}
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
template <class T>
KOKKOS_DEPRECATED_WITH_COMMENT("Use Kokkos::kokkos_swap instead!")
KOKKOS_FUNCTION
void swap(T& a, T& b) noexcept(::Kokkos::kokkos_swap(std::declval<T&>(),
std::declval<T&>())) {
::Kokkos::kokkos_swap(a, b);
}
#endif
} // namespace Experimental
} // namespace Kokkos

View File

@ -54,7 +54,7 @@ template <
bool lexicographical_compare(
const ExecutionSpace& ex,
const ::Kokkos::View<DataType1, Properties1...>& view1,
::Kokkos::View<DataType2, Properties2...>& view2) {
const ::Kokkos::View<DataType2, Properties2...>& view2) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2);
@ -71,7 +71,7 @@ template <
bool lexicographical_compare(
const std::string& label, const ExecutionSpace& ex,
const ::Kokkos::View<DataType1, Properties1...>& view1,
::Kokkos::View<DataType2, Properties2...>& view2) {
const ::Kokkos::View<DataType2, Properties2...>& view2) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2);
@ -112,7 +112,8 @@ template <
bool lexicographical_compare(
const ExecutionSpace& ex,
const ::Kokkos::View<DataType1, Properties1...>& view1,
::Kokkos::View<DataType2, Properties2...>& view2, ComparatorType comp) {
const ::Kokkos::View<DataType2, Properties2...>& view2,
ComparatorType comp) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2);
@ -129,7 +130,8 @@ template <
bool lexicographical_compare(
const std::string& label, const ExecutionSpace& ex,
const ::Kokkos::View<DataType1, Properties1...>& view1,
::Kokkos::View<DataType2, Properties2...>& view2, ComparatorType comp) {
const ::Kokkos::View<DataType2, Properties2...>& view2,
ComparatorType comp) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2);
@ -161,7 +163,7 @@ template <class TeamHandleType, class DataType1, class... Properties1,
KOKKOS_FUNCTION bool lexicographical_compare(
const TeamHandleType& teamHandle,
const ::Kokkos::View<DataType1, Properties1...>& view1,
::Kokkos::View<DataType2, Properties2...>& view2) {
const ::Kokkos::View<DataType2, Properties2...>& view2) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2);
@ -187,7 +189,8 @@ template <class TeamHandleType, class DataType1, class... Properties1,
KOKKOS_FUNCTION bool lexicographical_compare(
const TeamHandleType& teamHandle,
const ::Kokkos::View<DataType1, Properties1...>& view1,
::Kokkos::View<DataType2, Properties2...>& view2, ComparatorType comp) {
const ::Kokkos::View<DataType2, Properties2...>& view2,
ComparatorType comp) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2);

View File

@ -50,7 +50,7 @@ template <
std::enable_if_t<::Kokkos::is_execution_space_v<ExecutionSpace>, int> = 0>
auto move(const ExecutionSpace& ex,
const ::Kokkos::View<DataType1, Properties1...>& source,
::Kokkos::View<DataType2, Properties2...>& dest) {
const ::Kokkos::View<DataType2, Properties2...>& dest) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
@ -64,7 +64,7 @@ template <
std::enable_if_t<::Kokkos::is_execution_space_v<ExecutionSpace>, int> = 0>
auto move(const std::string& label, const ExecutionSpace& ex,
const ::Kokkos::View<DataType1, Properties1...>& source,
::Kokkos::View<DataType2, Properties2...>& dest) {
const ::Kokkos::View<DataType2, Properties2...>& dest) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
@ -92,7 +92,7 @@ template <typename TeamHandleType, typename DataType1, typename... Properties1,
KOKKOS_FUNCTION auto move(
const TeamHandleType& teamHandle,
const ::Kokkos::View<DataType1, Properties1...>& source,
::Kokkos::View<DataType2, Properties2...>& dest) {
const ::Kokkos::View<DataType2, Properties2...>& dest) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);

View File

@ -41,7 +41,7 @@ template <
std::enable_if_t<::Kokkos::is_execution_space_v<ExecutionSpace>, int> = 0>
auto move_backward(const ExecutionSpace& ex,
const ::Kokkos::View<DataType1, Properties1...>& source,
::Kokkos::View<DataType2, Properties2...>& dest) {
const ::Kokkos::View<DataType2, Properties2...>& dest) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
@ -65,7 +65,7 @@ template <
std::enable_if_t<::Kokkos::is_execution_space_v<ExecutionSpace>, int> = 0>
auto move_backward(const std::string& label, const ExecutionSpace& ex,
const ::Kokkos::View<DataType1, Properties1...>& source,
::Kokkos::View<DataType2, Properties2...>& dest) {
const ::Kokkos::View<DataType2, Properties2...>& dest) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
@ -94,7 +94,7 @@ template <typename TeamHandleType, typename DataType1, typename... Properties1,
KOKKOS_FUNCTION auto move_backward(
const TeamHandleType& teamHandle,
const ::Kokkos::View<DataType1, Properties1...>& source,
::Kokkos::View<DataType2, Properties2...>& dest) {
const ::Kokkos::View<DataType2, Properties2...>& dest) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);

View File

@ -50,7 +50,7 @@ template <
std::enable_if_t<::Kokkos::is_execution_space_v<ExecutionSpace>, int> = 0>
auto reverse_copy(const ExecutionSpace& ex,
const ::Kokkos::View<DataType1, Properties1...>& source,
::Kokkos::View<DataType2, Properties2...>& dest) {
const ::Kokkos::View<DataType2, Properties2...>& dest) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
@ -65,7 +65,7 @@ template <
std::enable_if_t<::Kokkos::is_execution_space_v<ExecutionSpace>, int> = 0>
auto reverse_copy(const std::string& label, const ExecutionSpace& ex,
const ::Kokkos::View<DataType1, Properties1...>& source,
::Kokkos::View<DataType2, Properties2...>& dest) {
const ::Kokkos::View<DataType2, Properties2...>& dest) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
@ -94,7 +94,7 @@ template <typename TeamHandleType, typename DataType1, typename... Properties1,
KOKKOS_FUNCTION auto reverse_copy(
const TeamHandleType& teamHandle,
const ::Kokkos::View<DataType1, Properties1...>& source,
::Kokkos::View<DataType2, Properties2...>& dest) {
const ::Kokkos::View<DataType2, Properties2...>& dest) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);

View File

@ -40,7 +40,7 @@ template <typename ExecutionSpace, typename DataType1, typename... Properties1,
std::enable_if_t<is_execution_space_v<ExecutionSpace>, int> = 0>
auto swap_ranges(const ExecutionSpace& ex,
const ::Kokkos::View<DataType1, Properties1...>& source,
::Kokkos::View<DataType2, Properties2...>& dest) {
const ::Kokkos::View<DataType2, Properties2...>& dest) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
@ -64,7 +64,7 @@ template <typename ExecutionSpace, typename DataType1, typename... Properties1,
std::enable_if_t<is_execution_space_v<ExecutionSpace>, int> = 0>
auto swap_ranges(const std::string& label, const ExecutionSpace& ex,
const ::Kokkos::View<DataType1, Properties1...>& source,
::Kokkos::View<DataType2, Properties2...>& dest) {
const ::Kokkos::View<DataType2, Properties2...>& dest) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
@ -94,7 +94,7 @@ template <typename TeamHandleType, typename DataType1, typename... Properties1,
KOKKOS_FUNCTION auto swap_ranges(
const TeamHandleType& teamHandle,
const ::Kokkos::View<DataType1, Properties1...>& source,
::Kokkos::View<DataType2, Properties2...>& dest) {
const ::Kokkos::View<DataType2, Properties2...>& dest) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);

View File

@ -58,7 +58,7 @@ template <typename ExecutionSpace, typename DataType1, typename... Properties1,
std::enable_if_t<is_execution_space_v<ExecutionSpace>, int> = 0>
auto transform(const ExecutionSpace& ex,
const ::Kokkos::View<DataType1, Properties1...>& source,
::Kokkos::View<DataType2, Properties2...>& dest,
const ::Kokkos::View<DataType2, Properties2...>& dest,
UnaryOperation unary_op) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
@ -73,7 +73,7 @@ template <typename ExecutionSpace, typename DataType1, typename... Properties1,
std::enable_if_t<is_execution_space_v<ExecutionSpace>, int> = 0>
auto transform(const std::string& label, const ExecutionSpace& ex,
const ::Kokkos::View<DataType1, Properties1...>& source,
::Kokkos::View<DataType2, Properties2...>& dest,
const ::Kokkos::View<DataType2, Properties2...>& dest,
UnaryOperation unary_op) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
@ -119,7 +119,7 @@ template <typename ExecutionSpace, typename DataType1, typename... Properties1,
auto transform(const ExecutionSpace& ex,
const ::Kokkos::View<DataType1, Properties1...>& source1,
const ::Kokkos::View<DataType2, Properties2...>& source2,
::Kokkos::View<DataType3, Properties3...>& dest,
const ::Kokkos::View<DataType3, Properties3...>& dest,
BinaryOperation binary_op) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source1);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source2);
@ -137,7 +137,7 @@ template <typename ExecutionSpace, typename DataType1, typename... Properties1,
auto transform(const std::string& label, const ExecutionSpace& ex,
const ::Kokkos::View<DataType1, Properties1...>& source1,
const ::Kokkos::View<DataType2, Properties2...>& source2,
::Kokkos::View<DataType3, Properties3...>& dest,
const ::Kokkos::View<DataType3, Properties3...>& dest,
BinaryOperation binary_op) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source1);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source2);
@ -174,7 +174,8 @@ template <typename TeamHandleType, typename DataType1, typename... Properties1,
KOKKOS_FUNCTION auto transform(
const TeamHandleType& teamHandle,
const ::Kokkos::View<DataType1, Properties1...>& source,
::Kokkos::View<DataType2, Properties2...>& dest, UnaryOperation unary_op) {
const ::Kokkos::View<DataType2, Properties2...>& dest,
UnaryOperation unary_op) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
@ -207,7 +208,7 @@ KOKKOS_FUNCTION auto transform(
const TeamHandleType& teamHandle,
const ::Kokkos::View<DataType1, Properties1...>& source1,
const ::Kokkos::View<DataType2, Properties2...>& source2,
::Kokkos::View<DataType3, Properties3...>& dest,
const ::Kokkos::View<DataType3, Properties3...>& dest,
BinaryOperation binary_op) {
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source1);
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source2);

View File

@ -47,8 +47,9 @@ struct ExclusiveScanDefaultFunctorForKnownNeutralElement {
KOKKOS_FUNCTION
void operator()(const IndexType i, ValueType& update,
const bool final_pass) const {
const auto tmp = m_first_from[i];
if (final_pass) m_first_dest[i] = update + m_init_value;
update += m_first_from[i];
update += tmp;
}
};
@ -73,6 +74,7 @@ struct ExclusiveScanDefaultFunctorWithValueWrapper {
KOKKOS_FUNCTION
void operator()(const IndexType i, value_type& update,
const bool final_pass) const {
const auto tmp = value_type{m_first_from[i], false};
if (final_pass) {
if (i == 0) {
m_first_dest[i] = m_init_value;
@ -81,7 +83,6 @@ struct ExclusiveScanDefaultFunctorWithValueWrapper {
}
}
const auto tmp = value_type{m_first_from[i], false};
this->join(update, tmp);
}
@ -132,6 +133,7 @@ struct TransformExclusiveScanFunctorWithValueWrapper {
KOKKOS_FUNCTION
void operator()(const IndexType i, value_type& update,
const bool final_pass) const {
const auto tmp = value_type{m_unary_op(m_first_from[i]), false};
if (final_pass) {
if (i == 0) {
// for both ExclusiveScan and TransformExclusiveScan,
@ -142,7 +144,6 @@ struct TransformExclusiveScanFunctorWithValueWrapper {
}
}
const auto tmp = value_type{m_unary_op(m_first_from[i]), false};
this->join(update, tmp);
}
@ -190,6 +191,7 @@ struct TransformExclusiveScanFunctorWithoutValueWrapper {
KOKKOS_FUNCTION
void operator()(const IndexType i, ValueType& update,
const bool final_pass) const {
const auto tmp = ValueType{m_unary_op(m_first_from[i])};
if (final_pass) {
if (i == 0) {
// for both ExclusiveScan and TransformExclusiveScan,
@ -200,7 +202,6 @@ struct TransformExclusiveScanFunctorWithoutValueWrapper {
}
}
const auto tmp = ValueType{m_unary_op(m_first_from[i])};
this->join(update, tmp);
}

View File

@ -46,15 +46,14 @@ struct StdRemoveIfStage1Functor {
void operator()(const IndexType i, IndexType& update,
const bool final_pass) const {
auto& myval = m_first_from[i];
if (final_pass) {
if (!m_must_remove(myval)) {
if (!m_must_remove(myval)) {
if (final_pass) {
// calling move here is ok because we are inside final pass
// we are calling move assign as specified by the std
m_first_dest[update] = std::move(myval);
}
}
if (!m_must_remove(myval)) {
update += 1;
}
}
@ -108,7 +107,9 @@ IteratorType remove_if_exespace_impl(const std::string& label,
// create helper tmp view
using value_type = typename IteratorType::value_type;
using tmp_view_type = Kokkos::View<value_type*, ExecutionSpace>;
tmp_view_type tmp_view("std_remove_if_tmp_view", keep_count);
tmp_view_type tmp_view(Kokkos::view_alloc(Kokkos::WithoutInitializing, ex,
"std_remove_if_tmp_view"),
keep_count);
using tmp_readwrite_iterator_type = decltype(begin(tmp_view));
// in stage 1, *move* all elements to keep from original range to tmp

View File

@ -21,7 +21,6 @@
#include "Kokkos_Constraints.hpp"
#include "Kokkos_HelperPredicates.hpp"
#include <std_algorithms/Kokkos_Distance.hpp>
#include <std_algorithms/Kokkos_Swap.hpp>
#include <string>
namespace Kokkos {
@ -39,7 +38,7 @@ struct StdReverseFunctor {
KOKKOS_FUNCTION
void operator()(index_type i) const {
::Kokkos::Experimental::swap(m_first[i], m_last[-i - 1]);
::Kokkos::kokkos_swap(m_first[i], m_last[-i - 1]);
}
KOKKOS_FUNCTION

View File

@ -126,10 +126,11 @@ KOKKOS_FUNCTION IteratorType shift_left_team_impl(
// execution space impl because for this team impl we are
// within a parallel region, so for now we solve serially
const std::size_t numElementsToMove =
using difference_type = typename IteratorType::difference_type;
const difference_type numElementsToMove =
::Kokkos::Experimental::distance(first + n, last);
Kokkos::single(Kokkos::PerTeam(teamHandle), [=]() {
for (std::size_t i = 0; i < numElementsToMove; ++i) {
for (difference_type i = 0; i < numElementsToMove; ++i) {
first[i] = std::move(first[i + n]);
}
});

View File

@ -103,26 +103,6 @@ IteratorType shift_right_exespace_impl(
return first + n;
}
template <class Iterator>
struct StdShiftRightTeamSingleFunctor {
Iterator m_first;
Iterator m_last;
std::size_t m_shift;
KOKKOS_FUNCTION
void operator()() const {
// the impl function calling this functor guarantees that
// - m_shift is non-negative
// - m_first, m_last identify a valid range with m_last > m_first
// - m_shift is less than m_last - m_first
// so I can safely use std::size_t here
}
KOKKOS_FUNCTION
StdShiftRightTeamSingleFunctor(Iterator _first, Iterator _last, std::size_t n)
: m_first(std::move(_first)), m_last(std::move(_last)), m_shift(n) {}
};
template <class TeamHandleType, class IteratorType>
KOKKOS_FUNCTION IteratorType shift_right_team_impl(
const TeamHandleType& teamHandle, IteratorType first, IteratorType last,
@ -145,10 +125,11 @@ KOKKOS_FUNCTION IteratorType shift_right_team_impl(
// execution space impl because for this team impl we are
// within a parallel region, so for now we solve serially
const std::size_t numElementsToMove =
using difference_type = typename IteratorType::difference_type;
const difference_type numElementsToMove =
::Kokkos::Experimental::distance(first, last - n);
Kokkos::single(Kokkos::PerTeam(teamHandle), [=]() {
for (std::size_t i = 0; i < numElementsToMove; ++i) {
for (difference_type i = 0; i < numElementsToMove; ++i) {
last[-i - 1] = std::move(last[-n - i - 1]);
}
});

View File

@ -21,7 +21,6 @@
#include "Kokkos_Constraints.hpp"
#include "Kokkos_HelperPredicates.hpp"
#include <std_algorithms/Kokkos_Distance.hpp>
#include <std_algorithms/Kokkos_Swap.hpp>
#include <string>
namespace Kokkos {
@ -36,7 +35,7 @@ struct StdSwapRangesFunctor {
KOKKOS_FUNCTION
void operator()(index_type i) const {
::Kokkos::Experimental::swap(m_first1[i], m_first2[i]);
::Kokkos::kokkos_swap(m_first1[i], m_first2[i]);
}
KOKKOS_FUNCTION

View File

@ -105,7 +105,9 @@ IteratorType unique_exespace_impl(const std::string& label,
// using the same algorithm used for unique_copy but we now move things
using value_type = typename IteratorType::value_type;
using tmp_view_type = Kokkos::View<value_type*, ExecutionSpace>;
tmp_view_type tmp_view("std_unique_tmp_view", num_elements_to_explore);
tmp_view_type tmp_view(Kokkos::view_alloc(ex, Kokkos::WithoutInitializing,
"std_unique_tmp_view"),
num_elements_to_explore);
// scan extent is: num_elements_to_explore - 1
// for same reason as the one explained in unique_copy

View File

@ -25,6 +25,7 @@ foreach(Tag Threads;Serial;OpenMP;Cuda;HPX;HIP;SYCL;OpenMPTarget)
set(ALGO_SORT_SOURCES)
foreach(SOURCE_Input
TestSort
TestSortByKey
TestSortCustomComp
TestBinSortA
TestBinSortB
@ -57,35 +58,37 @@ foreach(Tag Threads;Serial;OpenMP;Cuda;HPX;HIP;SYCL;OpenMPTarget)
configure_file(${dir}/dummy.cpp ${file})
list(APPEND ALGO_RANDOM_SOURCES ${file})
endforeach()
endif()
endforeach()
# ------------------------------------------
# std set A
# ------------------------------------------
set(STDALGO_SOURCES_A)
foreach(Name
# ------------------------------------------
# std set A
# ------------------------------------------
set(STDALGO_SOURCES_A)
foreach(Name
StdReducers
StdAlgorithmsConstraints
RandomAccessIterator
)
list(APPEND STDALGO_SOURCES_A Test${Name}.cpp)
endforeach()
)
list(APPEND STDALGO_SOURCES_A Test${Name}.cpp)
endforeach()
# ------------------------------------------
# std set B
# ------------------------------------------
set(STDALGO_SOURCES_B)
foreach(Name
# ------------------------------------------
# std set B
# ------------------------------------------
set(STDALGO_SOURCES_B)
foreach(Name
StdAlgorithmsCommon
StdAlgorithmsMinMaxElementOps
)
list(APPEND STDALGO_SOURCES_B Test${Name}.cpp)
endforeach()
)
list(APPEND STDALGO_SOURCES_B Test${Name}.cpp)
endforeach()
# ------------------------------------------
# std set C
# ------------------------------------------
set(STDALGO_SOURCES_C)
foreach(Name
# ------------------------------------------
# std set C
# ------------------------------------------
set(STDALGO_SOURCES_C)
foreach(Name
StdAlgorithmsCommon
StdAlgorithmsLexicographicalCompare
StdAlgorithmsForEach
@ -100,15 +103,15 @@ foreach(Tag Threads;Serial;OpenMP;Cuda;HPX;HIP;SYCL;OpenMPTarget)
StdAlgorithmsSearch_n
StdAlgorithmsMismatch
StdAlgorithmsMoveBackward
)
list(APPEND STDALGO_SOURCES_C Test${Name}.cpp)
endforeach()
)
list(APPEND STDALGO_SOURCES_C Test${Name}.cpp)
endforeach()
# ------------------------------------------
# std set D
# ------------------------------------------
set(STDALGO_SOURCES_D)
foreach(Name
# ------------------------------------------
# std set D
# ------------------------------------------
set(STDALGO_SOURCES_D)
foreach(Name
StdAlgorithmsCommon
StdAlgorithmsModOps
StdAlgorithmsModSeqOps
@ -128,15 +131,15 @@ foreach(Tag Threads;Serial;OpenMP;Cuda;HPX;HIP;SYCL;OpenMPTarget)
StdAlgorithmsReverse
StdAlgorithmsShiftLeft
StdAlgorithmsShiftRight
)
list(APPEND STDALGO_SOURCES_D Test${Name}.cpp)
endforeach()
)
list(APPEND STDALGO_SOURCES_D Test${Name}.cpp)
endforeach()
# ------------------------------------------
# std set E
# ------------------------------------------
set(STDALGO_SOURCES_E)
foreach(Name
# ------------------------------------------
# std set E
# ------------------------------------------
set(STDALGO_SOURCES_E)
foreach(Name
StdAlgorithmsCommon
StdAlgorithmsIsSorted
StdAlgorithmsIsSortedUntil
@ -149,83 +152,83 @@ foreach(Tag Threads;Serial;OpenMP;Cuda;HPX;HIP;SYCL;OpenMPTarget)
StdAlgorithmsTransformUnaryOp
StdAlgorithmsTransformExclusiveScan
StdAlgorithmsTransformInclusiveScan
)
list(APPEND STDALGO_SOURCES_E Test${Name}.cpp)
endforeach()
)
list(APPEND STDALGO_SOURCES_E Test${Name}.cpp)
endforeach()
# ------------------------------------------
# std team Q
# ------------------------------------------
set(STDALGO_TEAM_SOURCES_Q)
foreach(Name
# ------------------------------------------
# std team Q
# ------------------------------------------
set(STDALGO_TEAM_SOURCES_Q)
foreach(Name
StdAlgorithmsCommon
StdAlgorithmsTeamInclusiveScan
StdAlgorithmsTeamTransformInclusiveScan
)
list(APPEND STDALGO_TEAM_SOURCES_Q Test${Name}.cpp)
endforeach()
)
list(APPEND STDALGO_TEAM_SOURCES_Q Test${Name}.cpp)
endforeach()
# ------------------------------------------
# std team P
# ------------------------------------------
set(STDALGO_TEAM_SOURCES_P)
foreach(Name
# ------------------------------------------
# std team P
# ------------------------------------------
set(STDALGO_TEAM_SOURCES_P)
foreach(Name
StdAlgorithmsCommon
StdAlgorithmsTeamExclusiveScan
StdAlgorithmsTeamTransformExclusiveScan
)
list(APPEND STDALGO_TEAM_SOURCES_P Test${Name}.cpp)
endforeach()
)
list(APPEND STDALGO_TEAM_SOURCES_P Test${Name}.cpp)
endforeach()
# ------------------------------------------
# std team M
# ------------------------------------------
set(STDALGO_TEAM_SOURCES_M)
foreach(Name
# ------------------------------------------
# std team M
# ------------------------------------------
set(STDALGO_TEAM_SOURCES_M)
foreach(Name
StdAlgorithmsCommon
StdAlgorithmsTeamTransformUnaryOp
StdAlgorithmsTeamTransformBinaryOp
StdAlgorithmsTeamGenerate
StdAlgorithmsTeamGenerate_n
StdAlgorithmsTeamSwapRanges
)
list(APPEND STDALGO_TEAM_SOURCES_M Test${Name}.cpp)
endforeach()
)
list(APPEND STDALGO_TEAM_SOURCES_M Test${Name}.cpp)
endforeach()
# ------------------------------------------
# std team L
# ------------------------------------------
set(STDALGO_TEAM_SOURCES_L)
foreach(Name
# ------------------------------------------
# std team L
# ------------------------------------------
set(STDALGO_TEAM_SOURCES_L)
foreach(Name
StdAlgorithmsCommon
StdAlgorithmsTeamIsSorted
StdAlgorithmsTeamIsSortedUntil
StdAlgorithmsTeamIsPartitioned
StdAlgorithmsTeamPartitionCopy
StdAlgorithmsTeamPartitionPoint
)
list(APPEND STDALGO_TEAM_SOURCES_L Test${Name}.cpp)
endforeach()
)
list(APPEND STDALGO_TEAM_SOURCES_L Test${Name}.cpp)
endforeach()
# ------------------------------------------
# std team I
# ------------------------------------------
set(STDALGO_TEAM_SOURCES_I)
foreach(Name
# ------------------------------------------
# std team I
# ------------------------------------------
set(STDALGO_TEAM_SOURCES_I)
foreach(Name
StdAlgorithmsCommon
StdAlgorithmsTeamUnique
StdAlgorithmsTeamAdjacentDifference
StdAlgorithmsTeamReduce
StdAlgorithmsTeamTransformReduce
)
list(APPEND STDALGO_TEAM_SOURCES_I Test${Name}.cpp)
endforeach()
)
list(APPEND STDALGO_TEAM_SOURCES_I Test${Name}.cpp)
endforeach()
# ------------------------------------------
# std team H
# ------------------------------------------
set(STDALGO_TEAM_SOURCES_H)
foreach(Name
# ------------------------------------------
# std team H
# ------------------------------------------
set(STDALGO_TEAM_SOURCES_H)
foreach(Name
StdAlgorithmsCommon
StdAlgorithmsTeamCopy
StdAlgorithmsTeamCopy_n
@ -236,43 +239,43 @@ foreach(Tag Threads;Serial;OpenMP;Cuda;HPX;HIP;SYCL;OpenMPTarget)
StdAlgorithmsTeamRemoveIf
StdAlgorithmsTeamRemoveCopy
StdAlgorithmsTeamRemoveCopyIf
)
list(APPEND STDALGO_TEAM_SOURCES_H Test${Name}.cpp)
endforeach()
)
list(APPEND STDALGO_TEAM_SOURCES_H Test${Name}.cpp)
endforeach()
# ------------------------------------------
# std team G
# ------------------------------------------
set(STDALGO_TEAM_SOURCES_G)
foreach(Name
# ------------------------------------------
# std team G
# ------------------------------------------
set(STDALGO_TEAM_SOURCES_G)
foreach(Name
StdAlgorithmsCommon
StdAlgorithmsTeamMove
StdAlgorithmsTeamMoveBackward
StdAlgorithmsTeamShiftLeft
StdAlgorithmsTeamShiftRight
)
list(APPEND STDALGO_TEAM_SOURCES_G Test${Name}.cpp)
endforeach()
)
list(APPEND STDALGO_TEAM_SOURCES_G Test${Name}.cpp)
endforeach()
# ------------------------------------------
# std team F
# ------------------------------------------
set(STDALGO_TEAM_SOURCES_F)
foreach(Name
# ------------------------------------------
# std team F
# ------------------------------------------
set(STDALGO_TEAM_SOURCES_F)
foreach(Name
StdAlgorithmsCommon
StdAlgorithmsTeamReverse
StdAlgorithmsTeamReverseCopy
StdAlgorithmsTeamRotate
StdAlgorithmsTeamRotateCopy
)
list(APPEND STDALGO_TEAM_SOURCES_F Test${Name}.cpp)
endforeach()
)
list(APPEND STDALGO_TEAM_SOURCES_F Test${Name}.cpp)
endforeach()
# ------------------------------------------
# std team E
# ------------------------------------------
set(STDALGO_TEAM_SOURCES_E)
foreach(Name
# ------------------------------------------
# std team E
# ------------------------------------------
set(STDALGO_TEAM_SOURCES_E)
foreach(Name
StdAlgorithmsCommon
StdAlgorithmsTeamFill
StdAlgorithmsTeamFill_n
@ -280,28 +283,28 @@ foreach(Tag Threads;Serial;OpenMP;Cuda;HPX;HIP;SYCL;OpenMPTarget)
StdAlgorithmsTeamReplaceIf
StdAlgorithmsTeamReplaceCopy
StdAlgorithmsTeamReplaceCopyIf
)
list(APPEND STDALGO_TEAM_SOURCES_E Test${Name}.cpp)
endforeach()
)
list(APPEND STDALGO_TEAM_SOURCES_E Test${Name}.cpp)
endforeach()
# ------------------------------------------
# std team D
# ------------------------------------------
set(STDALGO_TEAM_SOURCES_D)
foreach(Name
# ------------------------------------------
# std team D
# ------------------------------------------
set(STDALGO_TEAM_SOURCES_D)
foreach(Name
StdAlgorithmsCommon
StdAlgorithmsTeamMinElement
StdAlgorithmsTeamMaxElement
StdAlgorithmsTeamMinMaxElement
)
list(APPEND STDALGO_TEAM_SOURCES_D Test${Name}.cpp)
endforeach()
)
list(APPEND STDALGO_TEAM_SOURCES_D Test${Name}.cpp)
endforeach()
# ------------------------------------------
# std team C
# ------------------------------------------
set(STDALGO_TEAM_SOURCES_C)
foreach(Name
# ------------------------------------------
# std team C
# ------------------------------------------
set(STDALGO_TEAM_SOURCES_C)
foreach(Name
StdAlgorithmsCommon
StdAlgorithmsTeamFind
StdAlgorithmsTeamFindIf
@ -310,29 +313,29 @@ foreach(Tag Threads;Serial;OpenMP;Cuda;HPX;HIP;SYCL;OpenMPTarget)
StdAlgorithmsTeamAnyOf
StdAlgorithmsTeamNoneOf
StdAlgorithmsTeamSearchN
)
list(APPEND STDALGO_TEAM_SOURCES_C Test${Name}.cpp)
endforeach()
)
list(APPEND STDALGO_TEAM_SOURCES_C Test${Name}.cpp)
endforeach()
# ------------------------------------------
# std team B
# ------------------------------------------
set(STDALGO_TEAM_SOURCES_B)
foreach(Name
# ------------------------------------------
# std team B
# ------------------------------------------
set(STDALGO_TEAM_SOURCES_B)
foreach(Name
StdAlgorithmsCommon
StdAlgorithmsTeamEqual
StdAlgorithmsTeamSearch
StdAlgorithmsTeamFindEnd
StdAlgorithmsTeamFindFirstOf
)
list(APPEND STDALGO_TEAM_SOURCES_B Test${Name}.cpp)
endforeach()
)
list(APPEND STDALGO_TEAM_SOURCES_B Test${Name}.cpp)
endforeach()
# ------------------------------------------
# std team A
# ------------------------------------------
set(STDALGO_TEAM_SOURCES_A)
foreach(Name
# ------------------------------------------
# std team A
# ------------------------------------------
set(STDALGO_TEAM_SOURCES_A)
foreach(Name
StdAlgorithmsCommon
StdAlgorithmsTeamAdjacentFind
StdAlgorithmsTeamCount
@ -341,11 +344,8 @@ foreach(Tag Threads;Serial;OpenMP;Cuda;HPX;HIP;SYCL;OpenMPTarget)
StdAlgorithmsTeamForEachN
StdAlgorithmsTeamLexicographicalCompare
StdAlgorithmsTeamMismatch
)
list(APPEND STDALGO_TEAM_SOURCES_A Test${Name}.cpp)
endforeach()
endif()
)
list(APPEND STDALGO_TEAM_SOURCES_A Test${Name}.cpp)
endforeach()
# FIXME_OPENMPTARGET - remove sort test as it leads to ICE with clang/16 and above at compile time.

View File

@ -27,13 +27,13 @@ TARGETS =
tmp := $(foreach device, $(KOKKOS_DEVICELIST), \
$(if $(filter Test$(device).cpp, $(shell ls Test$(device).cpp 2>/dev/null)),,\
$(shell echo "\#include <Test"${device}"_Category.hpp>" > Test$(device).cpp); \
$(shell echo "\#include <TestRandom.hpp>" >> Test$(device).cpp); \
$(shell echo "\#include <TestSort.hpp>" >> Test$(device).cpp); \
$(shell echo "\#include <TestBinSortA.hpp>" >> Test$(device).cpp); \
$(shell echo "\#include <TestBinSortB.hpp>" >> Test$(device).cpp); \
$(shell echo "\#include <TestNestedSort.hpp>" >> Test$(device).cpp); \
$(shell echo "\#include <TestSortCustomComp.hpp>" >> Test$(device).cpp); \
$(shell echo "$(H)include <Test"${device}"_Category.hpp>" > Test$(device).cpp); \
$(shell echo "$(H)include <TestRandom.hpp>" >> Test$(device).cpp); \
$(shell echo "$(H)include <TestSort.hpp>" >> Test$(device).cpp); \
$(shell echo "$(H)include <TestBinSortA.hpp>" >> Test$(device).cpp); \
$(shell echo "$(H)include <TestBinSortB.hpp>" >> Test$(device).cpp); \
$(shell echo "$(H)include <TestNestedSort.hpp>" >> Test$(device).cpp); \
$(shell echo "$(H)include <TestSortCustomComp.hpp>" >> Test$(device).cpp); \
) \
)

View File

@ -0,0 +1,241 @@
//@HEADER
// ************************************************************************
//
// Kokkos v. 4.0
// Copyright (2022) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
// See https://kokkos.org/LICENSE for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//@HEADER
#ifndef KOKKOS_ALGORITHMS_UNITTESTS_TEST_SORT_BY_KEY_HPP
#define KOKKOS_ALGORITHMS_UNITTESTS_TEST_SORT_BY_KEY_HPP
#include <gtest/gtest.h>
#include <Kokkos_Core.hpp>
#include <Kokkos_Random.hpp>
#include <Kokkos_Sort.hpp>
#include <utility> // pair
namespace Test {
namespace SortImpl {
struct Less {
template <class ValueType>
KOKKOS_INLINE_FUNCTION bool operator()(const ValueType &lhs,
const ValueType &rhs) const {
return lhs < rhs;
}
};
struct Greater {
template <class ValueType>
KOKKOS_INLINE_FUNCTION bool operator()(const ValueType &lhs,
const ValueType &rhs) const {
return lhs > rhs;
}
};
template <class ExecutionSpace, class Keys, class Permute,
class Comparator = Less>
struct is_sorted_by_key_struct {
Keys keys;
Keys keys_orig;
Permute permute;
Comparator comparator;
is_sorted_by_key_struct(Keys keys_, Keys keys_orig_, Permute permute_,
Comparator comparator_ = Comparator{})
: keys(keys_),
keys_orig(keys_orig_),
permute(permute_),
comparator(comparator_) {}
KOKKOS_INLINE_FUNCTION
void operator()(int i, unsigned int &count) const {
if (i < keys.extent_int(0) - 1 && comparator(keys(i + 1), keys(i))) ++count;
if (keys(i) != keys_orig(permute(i))) ++count;
}
};
template <typename ExecutionSpace, typename ViewType>
void iota(ExecutionSpace const &space, ViewType const &v,
typename ViewType::value_type value = 0) {
using ValueType = typename ViewType::value_type;
Kokkos::parallel_for(
"ArborX::Algorithms::iota",
Kokkos::RangePolicy<ExecutionSpace>(space, 0, v.extent(0)),
KOKKOS_LAMBDA(int i) { v(i) = value + (ValueType)i; });
}
} // namespace SortImpl
TEST(TEST_CATEGORY, SortByKeyEmptyView) {
using ExecutionSpace = TEST_EXECSPACE;
// does not matter if we use int or something else
Kokkos::View<int *, ExecutionSpace> keys("keys", 0);
Kokkos::View<float *, ExecutionSpace> values("values", 0);
ASSERT_NO_THROW(
Kokkos::Experimental::sort_by_key(ExecutionSpace(), keys, values));
}
TEST(TEST_CATEGORY, SortByKey) {
using ExecutionSpace = TEST_EXECSPACE;
using MemorySpace = typename ExecutionSpace::memory_space;
ExecutionSpace space{};
for (auto keys_vector : {std::vector<int>{36, 19, 25, 17, 3, 7, 1, 2, 9},
std::vector<int>{36, 19, 25, 17, 3, 9, 1, 2, 7},
std::vector<int>{100, 19, 36, 17, 3, 25, 1, 2, 7},
std::vector<int>{15, 5, 11, 3, 4, 8}}) {
auto const n = keys_vector.size();
auto keys = Kokkos::create_mirror_view_and_copy(
MemorySpace{},
Kokkos::View<int *, Kokkos::HostSpace, Kokkos::MemoryUnmanaged>(
keys_vector.data(), n));
auto keys_orig = Kokkos::create_mirror(space, keys);
Kokkos::deep_copy(space, keys_orig, keys);
Kokkos::View<int *, ExecutionSpace> permute("permute", n);
SortImpl::iota(space, permute);
Kokkos::Experimental::sort_by_key(space, keys, permute);
unsigned int sort_fails = 0;
Kokkos::parallel_reduce(
Kokkos::RangePolicy<ExecutionSpace>(space, 0, n),
SortImpl::is_sorted_by_key_struct<ExecutionSpace, decltype(keys),
decltype(permute)>(keys, keys_orig,
permute),
sort_fails);
ASSERT_EQ(sort_fails, 0u);
}
}
TEST(TEST_CATEGORY, SortByKeyWithComparator) {
using ExecutionSpace = TEST_EXECSPACE;
using MemorySpace = typename ExecutionSpace::memory_space;
ExecutionSpace space{};
SortImpl::Greater comparator;
for (auto keys_vector : {std::vector<int>{36, 19, 25, 17, 3, 7, 1, 2, 9},
std::vector<int>{36, 19, 25, 17, 3, 9, 1, 2, 7},
std::vector<int>{100, 19, 36, 17, 3, 25, 1, 2, 7},
std::vector<int>{15, 5, 11, 3, 4, 8}}) {
auto const n = keys_vector.size();
auto keys = Kokkos::create_mirror_view_and_copy(
MemorySpace{},
Kokkos::View<int *, Kokkos::HostSpace, Kokkos::MemoryUnmanaged>(
keys_vector.data(), n));
auto keys_orig = Kokkos::create_mirror(space, keys);
Kokkos::deep_copy(space, keys_orig, keys);
Kokkos::View<int *, ExecutionSpace> permute("permute", n);
SortImpl::iota(space, permute);
Kokkos::Experimental::sort_by_key(space, keys, permute, comparator);
unsigned int sort_fails = 0;
Kokkos::parallel_reduce(
Kokkos::RangePolicy<ExecutionSpace>(space, 0, n),
SortImpl::is_sorted_by_key_struct<ExecutionSpace, decltype(keys),
decltype(permute), SortImpl::Greater>(
keys, keys_orig, permute, comparator),
sort_fails);
ASSERT_EQ(sort_fails, 0u);
}
}
TEST(TEST_CATEGORY, SortByKeyStaticExtents) {
using ExecutionSpace = TEST_EXECSPACE;
ExecutionSpace space{};
Kokkos::View<int[10], ExecutionSpace> keys("keys");
Kokkos::View<int[10], ExecutionSpace> values_static("values_static");
ASSERT_NO_THROW(
Kokkos::Experimental::sort_by_key(space, keys, values_static));
Kokkos::View<int *, ExecutionSpace> values_dynamic("values_dynamic", 10);
ASSERT_NO_THROW(
Kokkos::Experimental::sort_by_key(space, keys, values_dynamic));
}
template <typename ExecutionSpace, typename Keys, typename Values>
void buildViewsForStrided(ExecutionSpace const &space, int n, Keys &keys,
Values &values) {
Kokkos::parallel_for(
"create_data",
Kokkos::MDRangePolicy<Kokkos::Rank<3>, ExecutionSpace>(space, {0, 0, 0},
{n, n, n}),
KOKKOS_LAMBDA(int i, int j, int k) {
keys(i, j, k) = n - i;
values(i, j, k) = j;
});
}
TEST(TEST_CATEGORY, SortByKeyWithStrides) {
using ExecutionSpace = TEST_EXECSPACE;
ExecutionSpace space{};
auto const n = 10;
Kokkos::View<int ***, ExecutionSpace> keys("keys", n, n, n);
Kokkos::View<int ***, ExecutionSpace> values("values", n, n, n);
buildViewsForStrided(space, n, keys, values);
auto keys_sub = Kokkos::subview(keys, Kokkos::ALL(), 1, 2);
auto values_sub = Kokkos::subview(values, 4, Kokkos::ALL(), 6);
auto keys_orig = Kokkos::create_mirror(space, keys_sub);
Kokkos::deep_copy(space, keys_orig, keys_sub);
Kokkos::Experimental::sort_by_key(space, keys_sub, values_sub);
unsigned int sort_fails = 0;
Kokkos::parallel_reduce(
Kokkos::RangePolicy<ExecutionSpace>(space, 0, n),
SortImpl::is_sorted_by_key_struct<ExecutionSpace, decltype(keys_sub),
decltype(values_sub)>(
keys_sub, keys_orig, values_sub),
sort_fails);
ASSERT_EQ(sort_fails, 0u);
}
TEST(TEST_CATEGORY, SortByKeyKeysLargerThanValues) {
using ExecutionSpace = TEST_EXECSPACE;
// does not matter if we use int or something else
Kokkos::View<int *, ExecutionSpace> keys("keys", 3);
Kokkos::View<float *, ExecutionSpace> values("values", 1);
ASSERT_DEATH(
Kokkos::Experimental::sort_by_key(ExecutionSpace(), keys, values),
"values and keys extents must be the same");
ASSERT_DEATH(Kokkos::Experimental::sort_by_key(ExecutionSpace(), keys, values,
SortImpl::Greater{}),
"values and keys extents must be the same");
}
} // namespace Test
#endif

View File

@ -239,16 +239,8 @@ KOKKOS_FUNCTION bool team_members_have_matching_result(
// set accum to 1 if a mismach is found
const bool mismatch = memberValue != target;
int accum = static_cast<int>(mismatch);
// FIXME_OPENMPTARGET: team API does not meet the TeamHandle concept and
// ignores the reducer passed
#if defined KOKKOS_ENABLE_OPENMPTARGET
Kokkos::Sum<int> dummyReducer(accum);
const auto result = teamHandle.team_reduce(accum, dummyReducer);
return (result == 0);
#else
teamHandle.team_reduce(Kokkos::Sum<int>(accum));
return (accum == 0);
#endif
}
template <class ValueType1, class ValueType2>

View File

@ -16,6 +16,7 @@
#include <TestStdAlgorithmsCommon.hpp>
#include <utility>
#include <iomanip>
namespace Test {
namespace stdalgos {
@ -132,47 +133,6 @@ void my_host_exclusive_scan(it1 first, it1 last, it2 dest, ValType init,
}
}
template <class ViewType1, class ViewType2, class ValueType, class BinaryOp>
void verify_data(ViewType1 data_view, // contains data
ViewType2 test_view, // the view to test
ValueType init_value, BinaryOp bop) {
//! always careful because views might not be deep copyable
auto data_view_dc = create_deep_copyable_compatible_clone(data_view);
auto data_view_h =
create_mirror_view_and_copy(Kokkos::HostSpace(), data_view_dc);
using gold_view_value_type = typename ViewType2::value_type;
Kokkos::View<gold_view_value_type*, Kokkos::HostSpace> gold_h(
"goldh", data_view.extent(0));
my_host_exclusive_scan(KE::cbegin(data_view_h), KE::cend(data_view_h),
KE::begin(gold_h), init_value, bop);
auto test_view_dc = create_deep_copyable_compatible_clone(test_view);
auto test_view_h =
create_mirror_view_and_copy(Kokkos::HostSpace(), test_view_dc);
if (test_view_h.extent(0) > 0) {
for (std::size_t i = 0; i < test_view_h.extent(0); ++i) {
// std::cout << i << " " << std::setprecision(15) << data_view_h(i) << " "
// << gold_h(i) << " " << test_view_h(i) << " "
// << std::abs(gold_h(i) - test_view_h(i)) << std::endl;
if (std::is_same<gold_view_value_type, int>::value) {
ASSERT_EQ(gold_h(i), test_view_h(i));
} else {
const auto error =
std::abs(static_cast<double>(gold_h(i) - test_view_h(i)));
if (error > 1e-10) {
std::cout << i << " " << std::setprecision(15) << data_view_h(i)
<< " " << gold_h(i) << " " << test_view_h(i) << " "
<< std::abs(static_cast<double>(gold_h(i) - test_view_h(i)))
<< std::endl;
}
EXPECT_LT(error, 1e-10);
}
}
}
}
template <class ValueType>
struct MultiplyFunctor {
KOKKOS_INLINE_FUNCTION
@ -189,107 +149,153 @@ struct SumFunctor {
}
};
struct VerifyData {
template <class ViewType1, class ViewType2, class ValueType, class BinaryOp>
void operator()(ViewType1 data_view, // contains data
ViewType2 test_view, // the view to test
ValueType init_value, BinaryOp bop) {
//! always careful because views might not be deep copyable
auto data_view_dc = create_deep_copyable_compatible_clone(data_view);
auto data_view_h =
create_mirror_view_and_copy(Kokkos::HostSpace(), data_view_dc);
using gold_view_value_type = typename ViewType2::value_type;
Kokkos::View<gold_view_value_type*, Kokkos::HostSpace> gold_h(
"goldh", data_view.extent(0));
my_host_exclusive_scan(KE::cbegin(data_view_h), KE::cend(data_view_h),
KE::begin(gold_h), init_value, bop);
auto test_view_dc = create_deep_copyable_compatible_clone(test_view);
auto test_view_h =
create_mirror_view_and_copy(Kokkos::HostSpace(), test_view_dc);
if (test_view_h.extent(0) > 0) {
for (std::size_t i = 0; i < test_view_h.extent(0); ++i) {
if (std::is_same<gold_view_value_type, int>::value) {
ASSERT_EQ(gold_h(i), test_view_h(i));
} else {
const auto error =
std::abs(static_cast<double>(gold_h(i) - test_view_h(i)));
ASSERT_LT(error, 1e-10) << i << " " << std::setprecision(15) << error
<< static_cast<double>(test_view_h(i)) << " "
<< static_cast<double>(gold_h(i));
}
}
}
}
template <class ViewType1, class ViewType2, class ValueType>
void operator()(ViewType1 data_view, // contains data
ViewType2 test_view, // the view to test
ValueType init_value) {
(*this)(data_view, test_view, init_value, SumFunctor<ValueType>());
}
};
std::string value_type_to_string(int) { return "int"; }
std::string value_type_to_string(double) { return "double"; }
template <class Tag, class ValueType, class InfoType>
void run_single_scenario_default_op(const InfoType& scenario_info,
ValueType init_value) {
using default_op = SumFunctor<ValueType>;
template <class Tag, class ValueType, class InfoType, class... OpOrEmpty>
void run_single_scenario(const InfoType& scenario_info, ValueType init_value,
OpOrEmpty... empty_or_op) {
const auto name = std::get<0>(scenario_info);
const std::size_t view_ext = std::get<1>(scenario_info);
// std::cout << "exclusive_scan default op: " << name << ", "
// << view_tag_to_string(Tag{}) << ", "
// << value_type_to_string(ValueType()) << ", "
// << "init = " << init_value << std::endl;
auto view_dest = create_view<ValueType>(Tag{}, view_ext, "exclusive_scan");
auto view_from = create_view<ValueType>(Tag{}, view_ext, "exclusive_scan");
fill_view(view_from, name);
// view_dest is filled with zeros before calling the algorithm everytime to
// ensure the algorithm does something meaningful
{
fill_zero(view_dest);
auto r = KE::exclusive_scan(exespace(), KE::cbegin(view_from),
KE::cend(view_from), KE::begin(view_dest),
init_value);
init_value, empty_or_op...);
ASSERT_EQ(r, KE::end(view_dest));
verify_data(view_from, view_dest, init_value, default_op());
VerifyData()(view_from, view_dest, init_value, empty_or_op...);
}
{
fill_zero(view_dest);
auto r = KE::exclusive_scan("label", exespace(), KE::cbegin(view_from),
KE::cend(view_from), KE::begin(view_dest),
init_value);
init_value, empty_or_op...);
ASSERT_EQ(r, KE::end(view_dest));
verify_data(view_from, view_dest, init_value, default_op());
VerifyData()(view_from, view_dest, init_value, empty_or_op...);
}
{
fill_zero(view_dest);
auto r = KE::exclusive_scan(exespace(), view_from, view_dest, init_value);
auto r = KE::exclusive_scan(exespace(), view_from, view_dest, init_value,
empty_or_op...);
ASSERT_EQ(r, KE::end(view_dest));
verify_data(view_from, view_dest, init_value, default_op());
VerifyData()(view_from, view_dest, init_value, empty_or_op...);
}
{
fill_zero(view_dest);
auto r = KE::exclusive_scan("label", exespace(), view_from, view_dest,
init_value);
init_value, empty_or_op...);
ASSERT_EQ(r, KE::end(view_dest));
verify_data(view_from, view_dest, init_value, default_op());
VerifyData()(view_from, view_dest, init_value, empty_or_op...);
}
Kokkos::fence();
}
template <class Tag, class ValueType, class InfoType, class BinaryOp>
void run_single_scenario_custom_op(const InfoType& scenario_info,
ValueType init_value, BinaryOp bop) {
template <class Tag, class ValueType, class InfoType, class... OpOrEmpty>
void run_single_scenario_inplace(const InfoType& scenario_info,
ValueType init_value,
OpOrEmpty... empty_or_op) {
const auto name = std::get<0>(scenario_info);
const std::size_t view_ext = std::get<1>(scenario_info);
// std::cout << "exclusive_scan custom op: " << name << ", "
// << view_tag_to_string(Tag{}) << ", "
// << value_type_to_string(ValueType()) << ", "
// << "init = " << init_value << std::endl;
auto view_dest = create_view<ValueType>(Tag{}, view_ext, "exclusive_scan");
auto view_from = create_view<ValueType>(Tag{}, view_ext, "exclusive_scan");
fill_view(view_from, name);
// since here we call the in-place operation, we need to use two views:
// view1: filled according to what the scenario asks for and is not modified
// view2: filled according to what the scenario asks for and used for the
// in-place op Therefore, after the op is done, view2 should contain the
// result of doing exclusive scan NOTE: view2 is filled below every time
// because the algorithm acts in place
auto view1 =
create_view<ValueType>(Tag{}, view_ext, "exclusive_scan_inplace_view1");
fill_view(view1, name);
auto view2 =
create_view<ValueType>(Tag{}, view_ext, "exclusive_scan_inplace_view2");
{
fill_zero(view_dest);
auto r = KE::exclusive_scan(exespace(), KE::cbegin(view_from),
KE::cend(view_from), KE::begin(view_dest),
init_value, bop);
ASSERT_EQ(r, KE::end(view_dest));
verify_data(view_from, view_dest, init_value, bop);
fill_view(view2, name);
auto r = KE::exclusive_scan(exespace(), KE::cbegin(view2), KE::cend(view2),
KE::begin(view2), init_value, empty_or_op...);
ASSERT_EQ(r, KE::end(view2));
VerifyData()(view1, view2, init_value, empty_or_op...);
}
{
fill_zero(view_dest);
auto r = KE::exclusive_scan("label", exespace(), KE::cbegin(view_from),
KE::cend(view_from), KE::begin(view_dest),
init_value, bop);
ASSERT_EQ(r, KE::end(view_dest));
verify_data(view_from, view_dest, init_value, bop);
fill_view(view2, name);
auto r = KE::exclusive_scan("label", exespace(), KE::cbegin(view2),
KE::cend(view2), KE::begin(view2), init_value,
empty_or_op...);
ASSERT_EQ(r, KE::end(view2));
VerifyData()(view1, view2, init_value, empty_or_op...);
}
{
fill_zero(view_dest);
auto r =
KE::exclusive_scan(exespace(), view_from, view_dest, init_value, bop);
ASSERT_EQ(r, KE::end(view_dest));
verify_data(view_from, view_dest, init_value, bop);
fill_view(view2, name);
auto r = KE::exclusive_scan(exespace(), view2, view2, init_value,
empty_or_op...);
ASSERT_EQ(r, KE::end(view2));
VerifyData()(view1, view2, init_value, empty_or_op...);
}
{
fill_zero(view_dest);
auto r = KE::exclusive_scan("label", exespace(), view_from, view_dest,
init_value, bop);
ASSERT_EQ(r, KE::end(view_dest));
verify_data(view_from, view_dest, init_value, bop);
fill_view(view2, name);
auto r = KE::exclusive_scan("label", exespace(), view2, view2, init_value,
empty_or_op...);
ASSERT_EQ(r, KE::end(view2));
VerifyData()(view1, view2, init_value, empty_or_op...);
}
Kokkos::fence();
@ -303,34 +309,39 @@ void run_exclusive_scan_all_scenarios() {
{"medium", 1103}, {"large", 10513}};
for (const auto& it : scenarios) {
run_single_scenario_default_op<Tag, ValueType>(it, ValueType{0});
run_single_scenario_default_op<Tag, ValueType>(it, ValueType{1});
run_single_scenario_default_op<Tag, ValueType>(it, ValueType{-2});
run_single_scenario_default_op<Tag, ValueType>(it, ValueType{3});
run_single_scenario<Tag, ValueType>(it, ValueType{0});
run_single_scenario<Tag, ValueType>(it, ValueType{1});
run_single_scenario<Tag, ValueType>(it, ValueType{-2});
run_single_scenario<Tag, ValueType>(it, ValueType{3});
run_single_scenario_inplace<Tag, ValueType>(it, ValueType{0});
run_single_scenario_inplace<Tag, ValueType>(it, ValueType{-2});
#if !defined KOKKOS_ENABLE_OPENMPTARGET
// custom multiply op is only run for small views otherwise it overflows
if (it.first == "small-a" || it.first == "small-b") {
using custom_bop_t = MultiplyFunctor<ValueType>;
run_single_scenario_custom_op<Tag, ValueType>(it, ValueType{0},
custom_bop_t());
run_single_scenario_custom_op<Tag, ValueType>(it, ValueType{1},
custom_bop_t());
run_single_scenario_custom_op<Tag, ValueType>(it, ValueType{-2},
custom_bop_t());
run_single_scenario_custom_op<Tag, ValueType>(it, ValueType{3},
custom_bop_t());
run_single_scenario<Tag, ValueType>(it, ValueType{0}, custom_bop_t());
run_single_scenario<Tag, ValueType>(it, ValueType{1}, custom_bop_t());
run_single_scenario<Tag, ValueType>(it, ValueType{-2}, custom_bop_t());
run_single_scenario<Tag, ValueType>(it, ValueType{3}, custom_bop_t());
run_single_scenario_inplace<Tag, ValueType>(it, ValueType{0},
custom_bop_t());
run_single_scenario_inplace<Tag, ValueType>(it, ValueType{-2},
custom_bop_t());
}
using custom_bop_t = SumFunctor<ValueType>;
run_single_scenario_custom_op<Tag, ValueType>(it, ValueType{0},
custom_bop_t());
run_single_scenario_custom_op<Tag, ValueType>(it, ValueType{1},
custom_bop_t());
run_single_scenario_custom_op<Tag, ValueType>(it, ValueType{-2},
custom_bop_t());
run_single_scenario_custom_op<Tag, ValueType>(it, ValueType{3},
custom_bop_t());
run_single_scenario<Tag, ValueType>(it, ValueType{0}, custom_bop_t());
run_single_scenario<Tag, ValueType>(it, ValueType{1}, custom_bop_t());
run_single_scenario<Tag, ValueType>(it, ValueType{-2}, custom_bop_t());
run_single_scenario<Tag, ValueType>(it, ValueType{3}, custom_bop_t());
run_single_scenario_inplace<Tag, ValueType>(it, ValueType{0},
custom_bop_t());
run_single_scenario_inplace<Tag, ValueType>(it, ValueType{-2},
custom_bop_t());
#endif
}
}

View File

@ -16,6 +16,7 @@
#include <TestStdAlgorithmsCommon.hpp>
#include <utility>
#include <iomanip>
namespace Test {
namespace stdalgos {
@ -143,51 +144,6 @@ void my_host_inclusive_scan(it1 first, it1 last, it2 dest, BinOp bop,
}
}
template <class ViewType1, class ViewType2, class BinaryOp, class... Args>
void verify_data(ViewType1 data_view, // contains data
ViewType2 test_view, // the view to test
BinaryOp bop, Args... args /* copy on purpose */) {
//! always careful because views might not be deep copyable
auto data_view_dc = create_deep_copyable_compatible_clone(data_view);
auto data_view_h =
create_mirror_view_and_copy(Kokkos::HostSpace(), data_view_dc);
using gold_view_value_type = typename ViewType2::value_type;
Kokkos::View<gold_view_value_type*, Kokkos::HostSpace> gold_h(
"goldh", data_view.extent(0));
my_host_inclusive_scan(KE::cbegin(data_view_h), KE::cend(data_view_h),
KE::begin(gold_h), bop, args...);
auto test_view_dc = create_deep_copyable_compatible_clone(test_view);
auto test_view_h =
create_mirror_view_and_copy(Kokkos::HostSpace(), test_view_dc);
const auto ext = test_view_h.extent(0);
if (ext > 0) {
for (std::size_t i = 0; i < ext; ++i) {
// std::cout << i << " " << std::setprecision(15) << data_view_h(i) << " "
// << gold_h(i) << " " << test_view_h(i) << " "
// << std::abs(gold_h(i) - test_view_h(i)) << std::endl;
if (std::is_same<gold_view_value_type, int>::value) {
ASSERT_EQ(gold_h(i), test_view_h(i));
} else {
const auto error =
std::abs(static_cast<double>(gold_h(i) - test_view_h(i)));
if (error > 1e-10) {
std::cout << i << " " << std::setprecision(15) << data_view_h(i)
<< " " << gold_h(i) << " " << test_view_h(i) << " "
<< std::abs(static_cast<double>(gold_h(i) - test_view_h(i)))
<< std::endl;
}
EXPECT_LT(error, 1e-10);
}
}
// std::cout << " last el: " << test_view_h(ext-1) << std::endl;
}
}
template <class ValueType>
struct MultiplyFunctor {
KOKKOS_INLINE_FUNCTION
@ -204,107 +160,151 @@ struct SumFunctor {
}
};
struct VerifyData {
template <class ViewType1, class ViewType2, class BinaryOp, class... Args>
void operator()(ViewType1 data_view, // contains data
ViewType2 test_view, // the view to test
BinaryOp bop, Args... args /* copy on purpose */) {
//! always careful because views might not be deep copyable
auto data_view_dc = create_deep_copyable_compatible_clone(data_view);
auto data_view_h =
create_mirror_view_and_copy(Kokkos::HostSpace(), data_view_dc);
using gold_view_value_type = typename ViewType2::value_type;
Kokkos::View<gold_view_value_type*, Kokkos::HostSpace> gold_h(
"goldh", data_view.extent(0));
my_host_inclusive_scan(KE::cbegin(data_view_h), KE::cend(data_view_h),
KE::begin(gold_h), bop, args...);
auto test_view_dc = create_deep_copyable_compatible_clone(test_view);
auto test_view_h =
create_mirror_view_and_copy(Kokkos::HostSpace(), test_view_dc);
const auto ext = test_view_h.extent(0);
if (ext > 0) {
for (std::size_t i = 0; i < ext; ++i) {
if (std::is_same<gold_view_value_type, int>::value) {
ASSERT_EQ(gold_h(i), test_view_h(i));
} else {
const auto error =
std::abs(static_cast<double>(gold_h(i) - test_view_h(i)));
ASSERT_LT(error, 1e-10) << i << " " << std::setprecision(15) << error
<< static_cast<double>(test_view_h(i)) << " "
<< static_cast<double>(gold_h(i));
}
}
}
}
template <class ViewType1, class ViewType2>
void operator()(ViewType1 data_view, // contains data
ViewType2 test_view) // the view to test
{
using value_type = typename ViewType1::non_const_value_type;
(*this)(data_view, test_view, SumFunctor<value_type>());
}
};
std::string value_type_to_string(int) { return "int"; }
std::string value_type_to_string(double) { return "double"; }
template <class Tag, class ValueType, class InfoType>
void run_single_scenario_default_op(const InfoType& scenario_info) {
using default_op = SumFunctor<ValueType>;
template <class Tag, class ValueType, class InfoType, class... Args>
void run_single_scenario(const InfoType& scenario_info,
Args... args /* copy on purpose */) {
const auto name = std::get<0>(scenario_info);
const std::size_t view_ext = std::get<1>(scenario_info);
// std::cout << "inclusive_scan default op: " << name << ", "
// << view_tag_to_string(Tag{}) << ", "
// << value_type_to_string(ValueType()) << std::endl;
auto view_dest = create_view<ValueType>(Tag{}, view_ext, "inclusive_scan");
auto view_from = create_view<ValueType>(Tag{}, view_ext, "inclusive_scan");
fill_view(view_from, name);
// view_dest is filled with zeros before calling the algorithm everytime to
// ensure the algorithm does something meaningful
{
fill_zero(view_dest);
auto r = KE::inclusive_scan(exespace(), KE::cbegin(view_from),
KE::cend(view_from), KE::begin(view_dest));
auto r =
KE::inclusive_scan(exespace(), KE::cbegin(view_from),
KE::cend(view_from), KE::begin(view_dest), args...);
ASSERT_EQ(r, KE::end(view_dest));
verify_data(view_from, view_dest, default_op());
VerifyData()(view_from, view_dest, args...);
}
{
fill_zero(view_dest);
auto r = KE::inclusive_scan("label", exespace(), KE::cbegin(view_from),
KE::cend(view_from), KE::begin(view_dest));
auto r =
KE::inclusive_scan("label", exespace(), KE::cbegin(view_from),
KE::cend(view_from), KE::begin(view_dest), args...);
ASSERT_EQ(r, KE::end(view_dest));
verify_data(view_from, view_dest, default_op());
VerifyData()(view_from, view_dest, args...);
}
{
fill_zero(view_dest);
auto r = KE::inclusive_scan(exespace(), view_from, view_dest);
auto r = KE::inclusive_scan(exespace(), view_from, view_dest, args...);
ASSERT_EQ(r, KE::end(view_dest));
verify_data(view_from, view_dest, default_op());
VerifyData()(view_from, view_dest, args...);
}
{
fill_zero(view_dest);
auto r = KE::inclusive_scan("label", exespace(), view_from, view_dest);
auto r =
KE::inclusive_scan("label", exespace(), view_from, view_dest, args...);
ASSERT_EQ(r, KE::end(view_dest));
verify_data(view_from, view_dest, default_op());
VerifyData()(view_from, view_dest, args...);
}
Kokkos::fence();
}
template <class Tag, class ValueType, class InfoType, class BinaryOp,
class... Args>
void run_single_scenario_custom_op(const InfoType& scenario_info, BinaryOp bop,
Args... args /* copy on purpose */) {
template <class Tag, class ValueType, class InfoType, class... Args>
void run_single_scenario_inplace(const InfoType& scenario_info,
Args... args /* copy on purpose */) {
const auto name = std::get<0>(scenario_info);
const std::size_t view_ext = std::get<1>(scenario_info);
// if (1 == sizeof...(Args)) {
// std::cout << "inclusive_scan custom op and init value: " << name << ", "
// << view_tag_to_string(Tag{}) << ", "
// << value_type_to_string(ValueType()) << ", " << std::endl;
// } else {
// std::cout << "inclusive_scan custom op: " << name << ", "
// << view_tag_to_string(Tag{}) << ", "
// << value_type_to_string(ValueType()) << ", " << std::endl;
// }
// since here we call the in-place operation, we need to use two views:
// view1: filled according to what the scenario asks for and is not modified
// view2: filled according to what the scenario asks for and used for the
// in-place op Therefore, after the op is done, view_2 should contain the
// result of doing exclusive scan NOTE: view2 is filled below every time
// because the algorithm acts in place
auto view_dest = create_view<ValueType>(Tag{}, view_ext, "inclusive_scan");
auto view_from = create_view<ValueType>(Tag{}, view_ext, "inclusive_scan");
fill_view(view_from, name);
auto view1 =
create_view<ValueType>(Tag{}, view_ext, "inclusive_scan_inplace_view1");
fill_view(view1, name);
auto view2 =
create_view<ValueType>(Tag{}, view_ext, "inclusive_scan_inplace_view2");
{
fill_zero(view_dest);
auto r = KE::inclusive_scan(exespace(), KE::cbegin(view_from),
KE::cend(view_from), KE::begin(view_dest), bop,
args...);
ASSERT_EQ(r, KE::end(view_dest));
verify_data(view_from, view_dest, bop, args...);
fill_view(view2, name);
auto r = KE::inclusive_scan(exespace(), KE::cbegin(view2), KE::cend(view2),
KE::begin(view2), args...);
ASSERT_EQ(r, KE::end(view2));
VerifyData()(view1, view2, args...);
}
{
fill_zero(view_dest);
auto r = KE::inclusive_scan("label", exespace(), KE::cbegin(view_from),
KE::cend(view_from), KE::begin(view_dest), bop,
args...);
ASSERT_EQ(r, KE::end(view_dest));
verify_data(view_from, view_dest, bop, args...);
fill_view(view2, name);
auto r = KE::inclusive_scan("label", exespace(), KE::cbegin(view2),
KE::cend(view2), KE::begin(view2), args...);
ASSERT_EQ(r, KE::end(view2));
VerifyData()(view1, view2, args...);
}
{
fill_zero(view_dest);
auto r = KE::inclusive_scan(exespace(), view_from, view_dest, bop, args...);
ASSERT_EQ(r, KE::end(view_dest));
verify_data(view_from, view_dest, bop, args...);
fill_view(view2, name);
auto r = KE::inclusive_scan(exespace(), view2, view2, args...);
ASSERT_EQ(r, KE::end(view2));
VerifyData()(view1, view2, args...);
}
{
fill_zero(view_dest);
auto r = KE::inclusive_scan("label", exespace(), view_from, view_dest, bop,
args...);
ASSERT_EQ(r, KE::end(view_dest));
verify_data(view_from, view_dest, bop, args...);
fill_view(view2, name);
auto r = KE::inclusive_scan("label", exespace(), view2, view2, args...);
ASSERT_EQ(r, KE::end(view2));
VerifyData()(view1, view2, args...);
}
Kokkos::fence();
@ -318,27 +318,35 @@ void run_inclusive_scan_all_scenarios() {
{"medium-a", 313}, {"medium-b", 1103}, {"large", 10513}};
for (const auto& it : scenarios) {
run_single_scenario_default_op<Tag, ValueType>(it);
run_single_scenario<Tag, ValueType>(it);
run_single_scenario_inplace<Tag, ValueType>(it);
#if !defined KOKKOS_ENABLE_OPENMPTARGET
// the sum custom op is always run
using sum_binary_op = SumFunctor<ValueType>;
sum_binary_op sbop;
run_single_scenario_custom_op<Tag, ValueType>(it, sbop);
run_single_scenario_custom_op<Tag, ValueType>(it, sbop, ValueType{0});
run_single_scenario_custom_op<Tag, ValueType>(it, sbop, ValueType{1});
run_single_scenario_custom_op<Tag, ValueType>(it, sbop, ValueType{-2});
run_single_scenario_custom_op<Tag, ValueType>(it, sbop, ValueType{3});
run_single_scenario<Tag, ValueType>(it, sbop);
run_single_scenario<Tag, ValueType>(it, sbop, ValueType{0});
run_single_scenario<Tag, ValueType>(it, sbop, ValueType{1});
run_single_scenario<Tag, ValueType>(it, sbop, ValueType{-2});
run_single_scenario<Tag, ValueType>(it, sbop, ValueType{3});
run_single_scenario_inplace<Tag, ValueType>(it, sbop, ValueType{0});
run_single_scenario_inplace<Tag, ValueType>(it, sbop, ValueType{-2});
// custom multiply only for small views to avoid overflows
if (it.first == "small-a" || it.first == "small-b") {
using mult_binary_op = MultiplyFunctor<ValueType>;
mult_binary_op mbop;
run_single_scenario_custom_op<Tag, ValueType>(it, mbop);
run_single_scenario_custom_op<Tag, ValueType>(it, mbop, ValueType{0});
run_single_scenario_custom_op<Tag, ValueType>(it, mbop, ValueType{1});
run_single_scenario_custom_op<Tag, ValueType>(it, mbop, ValueType{-2});
run_single_scenario_custom_op<Tag, ValueType>(it, mbop, ValueType{3});
run_single_scenario<Tag, ValueType>(it, mbop);
run_single_scenario<Tag, ValueType>(it, mbop, ValueType{0});
run_single_scenario<Tag, ValueType>(it, mbop, ValueType{1});
run_single_scenario<Tag, ValueType>(it, mbop, ValueType{-2});
run_single_scenario<Tag, ValueType>(it, mbop, ValueType{3});
run_single_scenario_inplace<Tag, ValueType>(it, mbop);
run_single_scenario_inplace<Tag, ValueType>(it, mbop, ValueType{0});
run_single_scenario_inplace<Tag, ValueType>(it, mbop, ValueType{-2});
}
#endif
}

View File

@ -146,7 +146,7 @@ void run_single_scenario(const InfoType& scenario_info) {
resultsA[3] = KE::is_sorted("label", exespace(), view);
const auto allA = std::all_of(resultsA.cbegin(), resultsA.cend(),
[=](bool v) { return v == gold; });
EXPECT_TRUE(allA);
EXPECT_TRUE(allA) << name << ", " << view_tag_to_string(Tag{});
#if !defined KOKKOS_ENABLE_OPENMPTARGET
CustomLessThanComparator<ValueType, ValueType> comp;
@ -159,7 +159,7 @@ void run_single_scenario(const InfoType& scenario_info) {
resultsB[3] = KE::is_sorted("label", exespace(), view, comp);
const auto allB = std::all_of(resultsB.cbegin(), resultsB.cend(),
[=](bool v) { return v == gold; });
EXPECT_TRUE(allB);
EXPECT_TRUE(allB) << name << ", " << view_tag_to_string(Tag{});
#endif
Kokkos::fence();
@ -173,9 +173,6 @@ void run_is_sorted_all_scenarios() {
{"medium-a", 1003}, {"medium-b", 1003}, {"large-a", 101513},
{"large-b", 101513}};
std::cout << "is_sorted: " << view_tag_to_string(Tag{})
<< ", all overloads \n";
for (const auto& it : scenarios) {
run_single_scenario<Tag, ValueType>(it);
}

View File

@ -145,10 +145,10 @@ void run_single_scenario(const InfoType& scenario_info) {
KE::is_sorted_until("label", exespace(), KE::begin(view), KE::end(view));
auto r3 = KE::is_sorted_until(exespace(), view);
auto r4 = KE::is_sorted_until("label", exespace(), view);
ASSERT_EQ(r1, gold);
ASSERT_EQ(r2, gold);
ASSERT_EQ(r3, gold);
ASSERT_EQ(r4, gold);
ASSERT_EQ(r1, gold) << name << ", " << view_tag_to_string(Tag{});
ASSERT_EQ(r2, gold) << name << ", " << view_tag_to_string(Tag{});
ASSERT_EQ(r3, gold) << name << ", " << view_tag_to_string(Tag{});
ASSERT_EQ(r4, gold) << name << ", " << view_tag_to_string(Tag{});
#if !defined KOKKOS_ENABLE_OPENMPTARGET
CustomLessThanComparator<ValueType, ValueType> comp;
@ -160,10 +160,10 @@ void run_single_scenario(const InfoType& scenario_info) {
auto r8 = KE::is_sorted_until("label", exespace(), view, comp);
#endif
ASSERT_EQ(r1, gold);
ASSERT_EQ(r2, gold);
ASSERT_EQ(r3, gold);
ASSERT_EQ(r4, gold);
ASSERT_EQ(r1, gold) << name << ", " << view_tag_to_string(Tag{});
ASSERT_EQ(r2, gold) << name << ", " << view_tag_to_string(Tag{});
ASSERT_EQ(r3, gold) << name << ", " << view_tag_to_string(Tag{});
ASSERT_EQ(r4, gold) << name << ", " << view_tag_to_string(Tag{});
Kokkos::fence();
}
@ -176,9 +176,6 @@ void run_is_sorted_until_all_scenarios() {
{"medium-a", 1003}, {"medium-b", 1003}, {"large-a", 101513},
{"large-b", 101513}};
std::cout << "is_sorted_until: " << view_tag_to_string(Tag{})
<< ", all overloads \n";
for (const auto& it : scenarios) {
run_single_scenario<Tag, ValueType>(it);
}

View File

@ -48,7 +48,7 @@ struct MyMovableType {
TEST(std_algorithms_mod_ops_test, move) {
MyMovableType a;
using move_t = decltype(std::move(a));
static_assert(std::is_rvalue_reference<move_t>::value, "");
static_assert(std::is_rvalue_reference<move_t>::value);
// move constr
MyMovableType b(std::move(a));
@ -70,7 +70,7 @@ struct StdAlgoModSeqOpsTestMove {
void operator()(const int index) const {
typename ViewType::value_type a{11};
using move_t = decltype(std::move(a));
static_assert(std::is_rvalue_reference<move_t>::value, "");
static_assert(std::is_rvalue_reference<move_t>::value);
m_view(index) = std::move(a);
}
@ -89,50 +89,6 @@ TEST(std_algorithms_mod_ops_test, move_within_parfor) {
}
}
// ------------
// swap
// ------------
TEST(std_algorithms_mod_ops_test, swap) {
{
int a = 1;
int b = 2;
KE::swap(a, b);
ASSERT_EQ(a, 2);
ASSERT_EQ(b, 1);
}
{
double a = 3.;
double b = 1.;
KE::swap(a, b);
EXPECT_DOUBLE_EQ(a, 1.);
EXPECT_DOUBLE_EQ(b, 3.);
}
}
template <class ViewType>
struct StdAlgoModSeqOpsTestSwap {
ViewType m_view;
KOKKOS_INLINE_FUNCTION
void operator()(const int index) const {
typename ViewType::value_type newval{11};
KE::swap(m_view(index), newval);
}
StdAlgoModSeqOpsTestSwap(ViewType aIn) : m_view(aIn) {}
};
TEST(std_algorithms_mod_ops_test, swap_within_parfor) {
auto a = create_view<double>(stdalgos::DynamicTag{}, 10, "a");
StdAlgoModSeqOpsTestSwap<decltype(a)> fnc(a);
Kokkos::parallel_for(a.extent(0), fnc);
auto a_h = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), a);
for (std::size_t i = 0; i < a.extent(0); ++i) {
EXPECT_DOUBLE_EQ(a_h(0), 11.);
}
}
// ------------
// iter_swap
// ------------

View File

@ -110,11 +110,9 @@ void verify_data(const std::string& name, ResultType my_result,
ViewTypeDestFalse view_dest_false, PredType pred) {
using value_type = typename ViewTypeFrom::value_type;
static_assert(
std::is_same<value_type, typename ViewTypeDestTrue::value_type>::value,
"");
std::is_same<value_type, typename ViewTypeDestTrue::value_type>::value);
static_assert(
std::is_same<value_type, typename ViewTypeDestFalse::value_type>::value,
"");
std::is_same<value_type, typename ViewTypeDestFalse::value_type>::value);
const std::size_t ext = view_from.extent(0);

View File

@ -166,6 +166,10 @@ void run_all_scenarios() {
}
TEST(std_algorithms_copy_if_team_test, test) {
// FIXME_OPENMPTARGET
#if defined(KOKKOS_ENABLE_OPENMPTARGET) && defined(KOKKOS_ARCH_INTEL_GPU)
GTEST_SKIP() << "the test is known to fail with OpenMPTarget on Intel GPUs";
#endif
run_all_scenarios<DynamicTag, double>();
run_all_scenarios<StridedTwoRowsTag, int>();
run_all_scenarios<StridedThreeRowsTag, unsigned>();

View File

@ -121,7 +121,9 @@ struct TestFunctorA {
}
};
template <class LayoutTag, class ValueType>
struct InPlace {};
template <class LayoutTag, class ValueType, class InPlaceOrVoid = void>
void test_A(std::size_t numTeams, std::size_t numCols, int apiId) {
/* description:
use a rank-2 view randomly filled with values,
@ -147,9 +149,6 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) {
using space_t = Kokkos::DefaultExecutionSpace;
Kokkos::TeamPolicy<space_t> policy(numTeams, Kokkos::AUTO());
// create the destination view
Kokkos::View<ValueType**> destView("destView", numTeams, numCols);
// exclusive_scan returns an iterator so to verify that it is correct
// each team stores the distance of the returned iterator from the beginning
// of the interval that team operates on and then we check that these
@ -168,12 +167,19 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) {
rand_pool pool(lowerBound * upperBound);
Kokkos::fill_random(initValuesView_h, pool, lowerBound, upperBound);
// use CTAD for functor
auto initValuesView =
Kokkos::create_mirror_view_and_copy(space_t(), initValuesView_h);
TestFunctorA fnc(sourceView, destView, distancesView, intraTeamSentinelView,
initValuesView, binaryOp, apiId);
Kokkos::parallel_for(policy, fnc);
Kokkos::View<ValueType**> destView("destView", numTeams, numCols);
if constexpr (std::is_same_v<InPlaceOrVoid, InPlace>) {
TestFunctorA fnc(sourceView, sourceView, distancesView,
intraTeamSentinelView, initValuesView, binaryOp, apiId);
Kokkos::parallel_for(policy, fnc);
} else {
TestFunctorA fnc(sourceView, destView, distancesView, intraTeamSentinelView,
initValuesView, binaryOp, apiId);
Kokkos::parallel_for(policy, fnc);
}
// -----------------------------------------------
// run cpp-std kernel and check
@ -223,11 +229,16 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) {
#undef exclusive_scan
}
auto dataViewAfterOp_h = create_host_space_copy(destView);
expect_equal_host_views(stdDestView, dataViewAfterOp_h);
if constexpr (std::is_same_v<InPlaceOrVoid, InPlace>) {
auto dataViewAfterOp_h = create_host_space_copy(sourceView);
expect_equal_host_views(stdDestView, dataViewAfterOp_h);
} else {
auto dataViewAfterOp_h = create_host_space_copy(destView);
expect_equal_host_views(stdDestView, dataViewAfterOp_h);
}
}
template <class LayoutTag, class ValueType>
template <class LayoutTag, class ValueType, class InPlaceOrVoid = void>
void run_all_scenarios() {
for (int numTeams : teamSizesToTest) {
for (const auto& numCols : {0, 1, 2, 13, 101, 1444, 8153}) {
@ -236,16 +247,24 @@ void run_all_scenarios() {
#else
for (int apiId : {0, 1}) {
#endif
test_A<LayoutTag, ValueType>(numTeams, numCols, apiId);
test_A<LayoutTag, ValueType, InPlaceOrVoid>(numTeams, numCols, apiId);
}
}
}
}
TEST(std_algorithms_exclusive_scan_team_test, test) {
// FIXME_OPENMPTARGET
#if defined(KOKKOS_ENABLE_OPENMPTARGET) && defined(KOKKOS_ARCH_INTEL_GPU)
GTEST_SKIP() << "the test is known to fail with OpenMPTarget on Intel GPUs";
#endif
run_all_scenarios<DynamicTag, double>();
run_all_scenarios<StridedTwoRowsTag, int>();
run_all_scenarios<StridedThreeRowsTag, unsigned>();
run_all_scenarios<DynamicTag, double, InPlace>();
run_all_scenarios<StridedTwoRowsTag, int, InPlace>();
run_all_scenarios<StridedThreeRowsTag, unsigned, InPlace>();
}
} // namespace TeamExclusiveScan

View File

@ -139,7 +139,9 @@ struct TestFunctorA {
}
};
template <class LayoutTag, class ValueType>
struct InPlace {};
template <class LayoutTag, class ValueType, class InPlaceOrVoid = void>
void test_A(std::size_t numTeams, std::size_t numCols, int apiId) {
/* description:
use a rank-2 view randomly filled with values,
@ -165,9 +167,6 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) {
using space_t = Kokkos::DefaultExecutionSpace;
Kokkos::TeamPolicy<space_t> policy(numTeams, Kokkos::AUTO());
// create the destination view
Kokkos::View<ValueType**> destView("destView", numTeams, numCols);
// inclusive_scan returns an iterator so to verify that it is correct
// each team stores the distance of the returned iterator from the beginning
// of the interval that team operates on and then we check that these
@ -186,12 +185,20 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) {
rand_pool pool(lowerBound * upperBound);
Kokkos::fill_random(initValuesView_h, pool, lowerBound, upperBound);
// use CTAD for functor
auto initValuesView =
Kokkos::create_mirror_view_and_copy(space_t(), initValuesView_h);
TestFunctorA fnc(sourceView, destView, distancesView, intraTeamSentinelView,
initValuesView, binaryOp, apiId);
Kokkos::parallel_for(policy, fnc);
// create the destination view
Kokkos::View<ValueType**> destView("destView", numTeams, numCols);
if constexpr (std::is_same_v<InPlaceOrVoid, InPlace>) {
TestFunctorA fnc(sourceView, sourceView, distancesView,
intraTeamSentinelView, initValuesView, binaryOp, apiId);
Kokkos::parallel_for(policy, fnc);
} else {
TestFunctorA fnc(sourceView, destView, distancesView, intraTeamSentinelView,
initValuesView, binaryOp, apiId);
Kokkos::parallel_for(policy, fnc);
}
// -----------------------------------------------
// run cpp-std kernel and check
@ -251,25 +258,38 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) {
#undef inclusive_scan
}
auto dataViewAfterOp_h = create_host_space_copy(destView);
expect_equal_host_views(stdDestView, dataViewAfterOp_h);
if constexpr (std::is_same_v<InPlaceOrVoid, InPlace>) {
auto dataViewAfterOp_h = create_host_space_copy(sourceView);
expect_equal_host_views(stdDestView, dataViewAfterOp_h);
} else {
auto dataViewAfterOp_h = create_host_space_copy(destView);
expect_equal_host_views(stdDestView, dataViewAfterOp_h);
}
}
template <class LayoutTag, class ValueType>
template <class LayoutTag, class ValueType, class InPlaceOrVoid = void>
void run_all_scenarios() {
for (int numTeams : teamSizesToTest) {
for (const auto& numCols : {0, 1, 2, 13, 101, 1444, 8153}) {
for (int apiId : {0, 1, 2, 3, 4, 5}) {
test_A<LayoutTag, ValueType>(numTeams, numCols, apiId);
test_A<LayoutTag, ValueType, InPlaceOrVoid>(numTeams, numCols, apiId);
}
}
}
}
TEST(std_algorithms_inclusive_scan_team_test, test) {
// FIXME_OPENMPTARGET
#if defined(KOKKOS_ENABLE_OPENMPTARGET) && defined(KOKKOS_ARCH_INTEL_GPU)
GTEST_SKIP() << "the test is known to fail with OpenMPTarget on Intel GPUs";
#endif
run_all_scenarios<DynamicTag, double>();
run_all_scenarios<StridedTwoRowsTag, int>();
run_all_scenarios<StridedThreeRowsTag, unsigned>();
run_all_scenarios<DynamicTag, double, InPlace>();
run_all_scenarios<StridedTwoRowsTag, int, InPlace>();
run_all_scenarios<StridedThreeRowsTag, unsigned, InPlace>();
}
} // namespace TeamInclusiveScan

View File

@ -212,6 +212,10 @@ void run_all_scenarios() {
}
TEST(std_algorithms_remove_copy_team_test, test) {
// FIXME_OPENMPTARGET
#if defined(KOKKOS_ENABLE_OPENMPTARGET) && defined(KOKKOS_ARCH_INTEL_GPU)
GTEST_SKIP() << "the test is known to fail with OpenMPTarget on Intel GPUs";
#endif
run_all_scenarios<DynamicTag, double>();
run_all_scenarios<StridedTwoRowsTag, int>();
run_all_scenarios<StridedThreeRowsTag, unsigned>();

View File

@ -168,6 +168,10 @@ void run_all_scenarios() {
}
TEST(std_algorithms_remove_copy_if_team_test, test) {
// FIXME_OPENMPTARGET
#if defined(KOKKOS_ENABLE_OPENMPTARGET) && defined(KOKKOS_ARCH_INTEL_GPU)
GTEST_SKIP() << "the test is known to fail with OpenMPTarget on Intel GPUs";
#endif
run_all_scenarios<DynamicTag, double>();
run_all_scenarios<StridedTwoRowsTag, int>();
run_all_scenarios<StridedThreeRowsTag, unsigned>();

View File

@ -108,7 +108,9 @@ struct TestFunctorA {
}
};
template <class LayoutTag, class ValueType>
struct InPlace {};
template <class LayoutTag, class ValueType, class InPlaceOrVoid = void>
void test_A(std::size_t numTeams, std::size_t numCols, int apiId) {
/* description:
use a rank-2 view randomly filled with values,
@ -134,9 +136,6 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) {
using space_t = Kokkos::DefaultExecutionSpace;
Kokkos::TeamPolicy<space_t> policy(numTeams, Kokkos::AUTO());
// create the destination view
Kokkos::View<ValueType**> destView("destView", numTeams, numCols);
// tranform_exclusive_scan returns an iterator so to verify that it is correct
// each team stores the distance of the returned iterator from the beginning
// of the interval that team operates on and then we check that these
@ -156,12 +155,21 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) {
rand_pool pool(lowerBound * upperBound);
Kokkos::fill_random(initValuesView_h, pool, lowerBound, upperBound);
// use CTAD for functor
auto initValuesView =
Kokkos::create_mirror_view_and_copy(space_t(), initValuesView_h);
TestFunctorA fnc(sourceView, destView, distancesView, intraTeamSentinelView,
initValuesView, binaryOp, unaryOp, apiId);
Kokkos::parallel_for(policy, fnc);
// create the destination view
Kokkos::View<ValueType**> destView("destView", numTeams, numCols);
if constexpr (std::is_same_v<InPlaceOrVoid, InPlace>) {
TestFunctorA fnc(sourceView, sourceView, distancesView,
intraTeamSentinelView, initValuesView, binaryOp, unaryOp,
apiId);
Kokkos::parallel_for(policy, fnc);
} else {
TestFunctorA fnc(sourceView, destView, distancesView, intraTeamSentinelView,
initValuesView, binaryOp, unaryOp, apiId);
Kokkos::parallel_for(policy, fnc);
}
// -----------------------------------------------
// run cpp-std kernel and check
@ -200,16 +208,21 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) {
#undef transform_exclusive_scan
}
auto dataViewAfterOp_h = create_host_space_copy(destView);
expect_equal_host_views(stdDestView, dataViewAfterOp_h);
if constexpr (std::is_same_v<InPlaceOrVoid, InPlace>) {
auto dataViewAfterOp_h = create_host_space_copy(sourceView);
expect_equal_host_views(stdDestView, dataViewAfterOp_h);
} else {
auto dataViewAfterOp_h = create_host_space_copy(destView);
expect_equal_host_views(stdDestView, dataViewAfterOp_h);
}
}
template <class LayoutTag, class ValueType>
template <class LayoutTag, class ValueType, class InPlaceOrVoid = void>
void run_all_scenarios() {
for (int numTeams : teamSizesToTest) {
for (const auto& numCols : {0, 1, 2, 13, 101, 1444, 8153}) {
for (int apiId : {0, 1}) {
test_A<LayoutTag, ValueType>(numTeams, numCols, apiId);
test_A<LayoutTag, ValueType, InPlaceOrVoid>(numTeams, numCols, apiId);
}
}
}
@ -219,6 +232,10 @@ TEST(std_algorithms_transform_exclusive_scan_team_test, test) {
run_all_scenarios<DynamicTag, double>();
run_all_scenarios<StridedTwoRowsTag, int>();
run_all_scenarios<StridedThreeRowsTag, unsigned>();
run_all_scenarios<DynamicTag, double, InPlace>();
run_all_scenarios<StridedTwoRowsTag, int, InPlace>();
run_all_scenarios<StridedThreeRowsTag, unsigned, InPlace>();
}
} // namespace TeamTransformExclusiveScan

View File

@ -131,7 +131,9 @@ struct TestFunctorA {
}
};
template <class LayoutTag, class ValueType>
struct InPlace {};
template <class LayoutTag, class ValueType, class InPlaceOrVoid = void>
void test_A(std::size_t numTeams, std::size_t numCols, int apiId) {
/* description:
use a rank-2 view randomly filled with values,
@ -157,9 +159,6 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) {
using space_t = Kokkos::DefaultExecutionSpace;
Kokkos::TeamPolicy<space_t> policy(numTeams, Kokkos::AUTO());
// create the destination view
Kokkos::View<ValueType**> destView("destView", numTeams, numCols);
// tranform_inclusive_scan returns an iterator so to verify that it is correct
// each team stores the distance of the returned iterator from the beginning
// of the interval that team operates on and then we check that these
@ -179,12 +178,21 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) {
rand_pool pool(lowerBound * upperBound);
Kokkos::fill_random(initValuesView_h, pool, lowerBound, upperBound);
// use CTAD for functor
auto initValuesView =
Kokkos::create_mirror_view_and_copy(space_t(), initValuesView_h);
TestFunctorA fnc(sourceView, destView, distancesView, intraTeamSentinelView,
initValuesView, binaryOp, unaryOp, apiId);
Kokkos::parallel_for(policy, fnc);
// create the destination view
Kokkos::View<ValueType**> destView("destView", numTeams, numCols);
if constexpr (std::is_same_v<InPlaceOrVoid, InPlace>) {
TestFunctorA fnc(sourceView, sourceView, distancesView,
intraTeamSentinelView, initValuesView, binaryOp, unaryOp,
apiId);
Kokkos::parallel_for(policy, fnc);
} else {
TestFunctorA fnc(sourceView, destView, distancesView, intraTeamSentinelView,
initValuesView, binaryOp, unaryOp, apiId);
Kokkos::parallel_for(policy, fnc);
}
// -----------------------------------------------
// run cpp-std kernel and check
@ -236,16 +244,21 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) {
}
#undef transform_inclusive_scan
auto dataViewAfterOp_h = create_host_space_copy(destView);
expect_equal_host_views(stdDestView, dataViewAfterOp_h);
if constexpr (std::is_same_v<InPlaceOrVoid, InPlace>) {
auto dataViewAfterOp_h = create_host_space_copy(sourceView);
expect_equal_host_views(stdDestView, dataViewAfterOp_h);
} else {
auto dataViewAfterOp_h = create_host_space_copy(destView);
expect_equal_host_views(stdDestView, dataViewAfterOp_h);
}
}
template <class LayoutTag, class ValueType>
template <class LayoutTag, class ValueType, class InPlaceOrVoid = void>
void run_all_scenarios() {
for (int numTeams : teamSizesToTest) {
for (const auto& numCols : {0, 1, 2, 13, 101, 1444, 8153}) {
for (int apiId : {0, 1, 2, 3}) {
test_A<LayoutTag, ValueType>(numTeams, numCols, apiId);
test_A<LayoutTag, ValueType, InPlaceOrVoid>(numTeams, numCols, apiId);
}
}
}
@ -255,6 +268,10 @@ TEST(std_algorithms_transform_inclusive_scan_team_test, test) {
run_all_scenarios<DynamicTag, double>();
run_all_scenarios<StridedTwoRowsTag, int>();
run_all_scenarios<StridedThreeRowsTag, unsigned>();
run_all_scenarios<DynamicTag, double, InPlace>();
run_all_scenarios<StridedTwoRowsTag, int, InPlace>();
run_all_scenarios<StridedThreeRowsTag, unsigned, InPlace>();
}
} // namespace TeamTransformInclusiveScan

View File

@ -186,6 +186,10 @@ void run_all_scenarios() {
}
TEST(std_algorithms_unique_copy_team_test, test) {
// FIXME_OPENMPTARGET
#if defined(KOKKOS_ENABLE_OPENMPTARGET) && defined(KOKKOS_ARCH_INTEL_GPU)
GTEST_SKIP() << "the test is known to fail with OpenMPTarget on Intel GPUs";
#endif
run_all_scenarios<DynamicTag, int>();
run_all_scenarios<StridedTwoRowsTag, int>();
run_all_scenarios<StridedThreeRowsTag, int>();

View File

@ -16,6 +16,7 @@
#include <TestStdAlgorithmsCommon.hpp>
#include <utility>
#include <iomanip>
namespace Test {
namespace stdalgos {
@ -160,24 +161,15 @@ void verify_data(ViewType1 data_view, // contains data
create_mirror_view_and_copy(Kokkos::HostSpace(), test_view_dc);
if (test_view_h.extent(0) > 0) {
for (std::size_t i = 0; i < test_view_h.extent(0); ++i) {
// std::cout << i << " " << std::setprecision(15) << data_view_h(i) << " "
// << gold_h(i) << " " << test_view_h(i) << " "
// << std::abs(gold_h(i) - test_view_h(i)) << std::endl;
if (std::is_same<gold_view_value_type, int>::value) {
ASSERT_EQ(gold_h(i), test_view_h(i));
} else {
const auto error = std::abs(gold_h(i) - test_view_h(i));
if (error > 1e-10) {
std::cout << i << " " << std::setprecision(15) << data_view_h(i)
<< " " << gold_h(i) << " " << test_view_h(i) << " "
<< std::abs(gold_h(i) - test_view_h(i)) << std::endl;
}
EXPECT_LT(error, 1e-10);
ASSERT_LT(error, 1e-10) << i << " " << std::setprecision(15) << error
<< static_cast<double>(test_view_h(i)) << " "
<< static_cast<double>(gold_h(i));
}
}
// std::cout << " last el: " << test_view_h(test_view_h.extent(0)-1) <<
// std::endl;
}
}
@ -205,17 +197,13 @@ void run_single_scenario(const InfoType& scenario_info, ValueType init_value,
BinaryOp bop, UnaryOp uop) {
const auto name = std::get<0>(scenario_info);
const std::size_t view_ext = std::get<1>(scenario_info);
// std::cout << "transform_exclusive_scan custom op: " << name << ", "
// << view_tag_to_string(Tag{}) << ", "
// << value_type_to_string(ValueType()) << ", "
// << "init = " << init_value << std::endl;
auto view_dest =
create_view<ValueType>(Tag{}, view_ext, "transform_exclusive_scan");
auto view_from =
create_view<ValueType>(Tag{}, view_ext, "transform_exclusive_scan");
auto view_from = create_view<ValueType>(Tag{}, view_ext,
"transform_exclusive_scan_view_from");
fill_view(view_from, name);
auto view_dest = create_view<ValueType>(Tag{}, view_ext,
"transform_exclusive_scan_view_dest");
{
fill_zero(view_dest);
auto r = KE::transform_exclusive_scan(
@ -253,6 +241,65 @@ void run_single_scenario(const InfoType& scenario_info, ValueType init_value,
Kokkos::fence();
}
template <class Tag, class ValueType, class InfoType, class BinaryOp,
class UnaryOp>
void run_single_scenario_inplace(const InfoType& scenario_info,
ValueType init_value, BinaryOp bop,
UnaryOp uop) {
const auto name = std::get<0>(scenario_info);
const std::size_t view_ext = std::get<1>(scenario_info);
// since here we call the in-place operation, we need to use two views:
// view1: filled according to what the scenario asks for and is not modified
// view2: filled according to what the scenario asks for and used for the
// in-place op Therefore, after the op is done, view2 should contain the
// result of doing exclusive scan NOTE: view2 is filled below every time
// because the algorithm acts in place
auto view1 =
create_view<ValueType>(Tag{}, view_ext, "transform_exclusive_scan_view1");
fill_view(view1, name);
auto view2 =
create_view<ValueType>(Tag{}, view_ext, "transform_exclusive_scan_view2");
{
fill_view(view2, name);
auto r = KE::transform_exclusive_scan(exespace(), KE::cbegin(view2),
KE::cend(view2), KE::begin(view2),
init_value, bop, uop);
ASSERT_EQ(r, KE::end(view2));
verify_data(view1, view2, init_value, bop, uop);
}
{
fill_view(view2, name);
auto r = KE::transform_exclusive_scan(
"label", exespace(), KE::cbegin(view2), KE::cend(view2),
KE::begin(view2), init_value, bop, uop);
ASSERT_EQ(r, KE::end(view2));
verify_data(view1, view2, init_value, bop, uop);
}
{
fill_view(view2, name);
auto r = KE::transform_exclusive_scan(exespace(), view2, view2, init_value,
bop, uop);
ASSERT_EQ(r, KE::end(view2));
verify_data(view1, view2, init_value, bop, uop);
}
{
fill_view(view2, name);
auto r = KE::transform_exclusive_scan("label", exespace(), view2, view2,
init_value, bop, uop);
ASSERT_EQ(r, KE::end(view2));
verify_data(view1, view2, init_value, bop, uop);
}
Kokkos::fence();
}
template <class Tag, class ValueType>
void run_all_scenarios() {
const std::map<std::string, std::size_t> scenarios = {
@ -267,6 +314,11 @@ void run_all_scenarios() {
run_single_scenario<Tag, ValueType>(it, ValueType{1}, bop_t(), uop_t());
run_single_scenario<Tag, ValueType>(it, ValueType{-2}, bop_t(), uop_t());
run_single_scenario<Tag, ValueType>(it, ValueType{3}, bop_t(), uop_t());
run_single_scenario_inplace<Tag, ValueType>(it, ValueType{0}, bop_t(),
uop_t());
run_single_scenario_inplace<Tag, ValueType>(it, ValueType{-2}, bop_t(),
uop_t());
}
}

View File

@ -16,6 +16,7 @@
#include <TestStdAlgorithmsCommon.hpp>
#include <utility>
#include <iomanip>
namespace Test {
namespace stdalgos {
@ -172,24 +173,15 @@ void verify_data(ViewType1 data_view, // contains data
create_mirror_view_and_copy(Kokkos::HostSpace(), test_view_dc);
if (test_view_h.extent(0) > 0) {
for (std::size_t i = 0; i < test_view_h.extent(0); ++i) {
// std::cout << i << " " << std::setprecision(15) << data_view_h(i) << " "
// << gold_h(i) << " " << test_view_h(i) << " "
// << std::abs(gold_h(i) - test_view_h(i)) << std::endl;
if (std::is_same<gold_view_value_type, int>::value) {
ASSERT_EQ(gold_h(i), test_view_h(i));
} else {
const auto error = std::abs(gold_h(i) - test_view_h(i));
if (error > 1e-10) {
std::cout << i << " " << std::setprecision(15) << data_view_h(i)
<< " " << gold_h(i) << " " << test_view_h(i) << " "
<< std::abs(gold_h(i) - test_view_h(i)) << std::endl;
}
EXPECT_LT(error, 1e-10);
ASSERT_LT(error, 1e-10) << i << " " << std::setprecision(15) << error
<< static_cast<double>(test_view_h(i)) << " "
<< static_cast<double>(gold_h(i));
}
}
// std::cout << " last el: " << test_view_h(test_view_h.extent(0)-1) <<
// std::endl;
}
}
@ -210,30 +202,11 @@ struct SumBinaryFunctor {
std::string value_type_to_string(int) { return "int"; }
std::string value_type_to_string(double) { return "double"; }
template <class Tag, class BopT, class UopT>
void print_scenario_details(const std::string& name, BopT bop, UopT uop) {
(void)bop;
(void)uop;
std::cout << "transform_inclusive_scan: " << name << ", "
<< view_tag_to_string(Tag{}) << std::endl;
}
template <class Tag, class BopT, class UopT, class ValueType>
void print_scenario_details(const std::string& name, BopT bop, UopT uop,
ValueType init_value) {
(void)bop;
(void)uop;
std::cout << "transform_inclusive_scan: " << name << ", "
<< view_tag_to_string(Tag{}) << ", "
<< "init = " << init_value << std::endl;
}
template <class Tag, class ValueType, class InfoType, class... Args>
void run_single_scenario(const InfoType& scenario_info,
Args... args /* by value on purpose*/) {
const auto name = std::get<0>(scenario_info);
const std::size_t view_ext = std::get<1>(scenario_info);
// print_scenario_details<Tag>(name, args...);
auto view_dest =
create_view<ValueType>(Tag{}, view_ext, "transform_inclusive_scan");
@ -278,6 +251,63 @@ void run_single_scenario(const InfoType& scenario_info,
Kokkos::fence();
}
template <class Tag, class ValueType, class InfoType, class... Args>
void run_single_scenario_inplace(const InfoType& scenario_info,
Args... args /* by value on purpose*/) {
const auto name = std::get<0>(scenario_info);
const std::size_t view_ext = std::get<1>(scenario_info);
// since here we call the in-place operation, we need to use two views:
// view1: filled according to scenario and is not modified
// view2: filled according scenario and used for the in-place op
// Therefore, after the op is done, view_2 should contain the
// result of doing exclusive scan.
// NOTE: view2 must be filled before every call to the algorithm
// because the algorithm acts in place
auto view_1 = create_view<ValueType>(Tag{}, view_ext,
"transform_inclusive_scan_view_1");
fill_view(view_1, name);
auto view_2 = create_view<ValueType>(Tag{}, view_ext,
"transform_inclusive_scan_view_2");
{
fill_view(view_2, name);
auto r = KE::transform_inclusive_scan(exespace(), KE::cbegin(view_2),
KE::cend(view_2), KE::begin(view_2),
args...);
ASSERT_EQ(r, KE::end(view_2));
verify_data(view_1, view_2, args...);
}
{
fill_view(view_2, name);
auto r = KE::transform_inclusive_scan("label", exespace(),
KE::cbegin(view_2), KE::cend(view_2),
KE::begin(view_2), args...);
ASSERT_EQ(r, KE::end(view_2));
verify_data(view_1, view_2, args...);
}
{
fill_view(view_2, name);
auto r = KE::transform_inclusive_scan(exespace(), view_2, view_2, args...);
ASSERT_EQ(r, KE::end(view_2));
verify_data(view_1, view_2, args...);
}
{
fill_view(view_2, name);
auto r = KE::transform_inclusive_scan("label", exespace(), view_2, view_2,
args...);
ASSERT_EQ(r, KE::end(view_2));
verify_data(view_1, view_2, args...);
}
Kokkos::fence();
}
template <class Tag, class ValueType>
void run_all_scenarios() {
const std::map<std::string, std::size_t> scenarios = {
@ -294,15 +324,23 @@ void run_all_scenarios() {
run_single_scenario<Tag, ValueType>(it, bop_t(), uop_t(), ValueType{2});
run_single_scenario<Tag, ValueType>(it, bop_t(), uop_t(), ValueType{-1});
run_single_scenario<Tag, ValueType>(it, bop_t(), uop_t(), ValueType{-2});
run_single_scenario_inplace<Tag, ValueType>(it, bop_t(), uop_t());
run_single_scenario_inplace<Tag, ValueType>(it, bop_t(), uop_t(),
ValueType{0});
run_single_scenario_inplace<Tag, ValueType>(it, bop_t(), uop_t(),
ValueType{2});
run_single_scenario_inplace<Tag, ValueType>(it, bop_t(), uop_t(),
ValueType{-2});
}
}
#if !defined KOKKOS_ENABLE_OPENMPTARGET
TEST(std_algorithms_numeric_ops_test, transform_inclusive_scan) {
run_all_scenarios<DynamicTag, double>();
// run_all_scenarios<StridedThreeTag, double>();
// run_all_scenarios<DynamicTag, int>();
// run_all_scenarios<StridedThreeTag, int>();
run_all_scenarios<StridedThreeTag, double>();
run_all_scenarios<DynamicTag, int>();
run_all_scenarios<StridedThreeTag, int>();
}
#endif

View File

@ -83,9 +83,6 @@ auto run_min_or_max_test(ViewType view, StdReducersTestEnumOrder enValue) {
static_assert(std::is_same<ExeSpace, Kokkos::HostSpace>::value,
"test is only enabled for HostSpace");
std::cout << "checking reduction with order: " << order_to_string(enValue)
<< "\n";
using view_value_type = typename ViewType::value_type;
using reducer_type = std::conditional_t<
(flag == 0), Kokkos::MaxFirstLoc<view_value_type, IndexType, ExeSpace>,
@ -132,18 +129,24 @@ TEST(std_algorithms_reducers, max_first_loc) {
const auto pair1 = run_min_or_max_test<0, hostspace, index_type>(
view_h, StdReducersTestEnumOrder::LeftToRight);
ASSERT_EQ(pair1.first, gold_value);
ASSERT_EQ(pair1.second, gold_location);
ASSERT_EQ(pair1.first, gold_value)
<< order_to_string(StdReducersTestEnumOrder::LeftToRight);
ASSERT_EQ(pair1.second, gold_location)
<< order_to_string(StdReducersTestEnumOrder::LeftToRight);
const auto pair2 = run_min_or_max_test<0, hostspace, index_type>(
view_h, StdReducersTestEnumOrder::RightToLeft);
ASSERT_EQ(pair2.first, gold_value);
ASSERT_EQ(pair2.second, gold_location);
ASSERT_EQ(pair2.first, gold_value)
<< order_to_string(StdReducersTestEnumOrder::RightToLeft);
ASSERT_EQ(pair2.second, gold_location)
<< order_to_string(StdReducersTestEnumOrder::RightToLeft);
const auto pair3 = run_min_or_max_test<0, hostspace, index_type>(
view_h, StdReducersTestEnumOrder::Random);
ASSERT_EQ(pair3.first, gold_value);
ASSERT_EQ(pair3.second, gold_location);
ASSERT_EQ(pair3.first, gold_value)
<< order_to_string(StdReducersTestEnumOrder::Random);
ASSERT_EQ(pair3.second, gold_location)
<< order_to_string(StdReducersTestEnumOrder::Random);
}
TEST(std_algorithms_reducers, min_first_loc) {
@ -191,9 +194,6 @@ void run_min_max_test(ViewType view, StdReducersTestEnumOrder enValue,
static_assert(std::is_same<ExeSpace, Kokkos::HostSpace>::value,
"test is only enabled for HostSpace");
std::cout << "checking reduction with order: " << order_to_string(enValue)
<< "\n";
using view_value_type = typename ViewType::value_type;
using reducer_type =
Kokkos::MinMaxFirstLastLoc<view_value_type, IndexType, ExeSpace>;
@ -212,10 +212,10 @@ void run_min_max_test(ViewType view, StdReducersTestEnumOrder enValue,
reduction_value_type{view(index), view(index), index, index});
}
ASSERT_EQ(red_result.min_val, gold_values.first);
ASSERT_EQ(red_result.max_val, gold_values.second);
ASSERT_EQ(red_result.min_loc, gold_locs.first);
ASSERT_EQ(red_result.max_loc, gold_locs.second);
ASSERT_EQ(red_result.min_val, gold_values.first) << order_to_string(enValue);
ASSERT_EQ(red_result.max_val, gold_values.second) << order_to_string(enValue);
ASSERT_EQ(red_result.min_loc, gold_locs.first) << order_to_string(enValue);
ASSERT_EQ(red_result.max_loc, gold_locs.second) << order_to_string(enValue);
}
TEST(std_algorithms_reducers, min_max_first_last_loc) {

View File

@ -1 +1,12 @@
#FIXME_OPENMPTARGET - compiling in debug mode causes ICE.
KOKKOS_ADD_BENCHMARK_DIRECTORIES(atomic)
KOKKOS_ADD_BENCHMARK_DIRECTORIES(gather)
KOKKOS_ADD_BENCHMARK_DIRECTORIES(gups)
KOKKOS_ADD_BENCHMARK_DIRECTORIES(launch_latency)
KOKKOS_ADD_BENCHMARK_DIRECTORIES(stream)
#FIXME_OPENMPTARGET - These two benchmarks cause ICE. Commenting them for now but a deeper analysis on the cause and a possible fix will follow.
IF(NOT Kokkos_ENABLE_OPENMPTARGET)
KOKKOS_ADD_BENCHMARK_DIRECTORIES(policy_performance)
KOKKOS_ADD_BENCHMARK_DIRECTORIES(bytes_and_flops)
ENDIF()

View File

@ -0,0 +1,4 @@
KOKKOS_ADD_EXECUTABLE(
atomic
SOURCES main.cpp
)

View File

@ -0,0 +1,4 @@
KOKKOS_ADD_EXECUTABLE(
bytes_and_flops
SOURCES bench_double.cpp bench_float.cpp bench_int32_t.cpp bench_int64_t.cpp main.cpp
)

View File

@ -37,22 +37,22 @@ struct RunStride {
};
#define STRIDE 1
#include <bench_stride.hpp>
#include "bench_stride.hpp"
#undef STRIDE
#define STRIDE 2
#include <bench_stride.hpp>
#include "bench_stride.hpp"
#undef STRIDE
#define STRIDE 4
#include <bench_stride.hpp>
#include "bench_stride.hpp"
#undef STRIDE
#define STRIDE 8
#include <bench_stride.hpp>
#include "bench_stride.hpp"
#undef STRIDE
#define STRIDE 16
#include <bench_stride.hpp>
#include "bench_stride.hpp"
#undef STRIDE
#define STRIDE 32
#include <bench_stride.hpp>
#include "bench_stride.hpp"
#undef STRIDE
template <class Scalar>

View File

@ -14,7 +14,7 @@
//
//@HEADER
#include <bench.hpp>
#include "bench.hpp"
template void run_stride_unroll<double>(int N, int K, int R, int D, int U,
int F, int T, int S, int B, int I);

View File

@ -14,7 +14,7 @@
//
//@HEADER
#include <bench.hpp>
#include "bench.hpp"
template void run_stride_unroll<float>(int N, int K, int R, int D, int U, int F,
int T, int S, int B, int I);

View File

@ -14,7 +14,7 @@
//
//@HEADER
#include <bench.hpp>
#include "bench.hpp"
template void run_stride_unroll<int32_t>(int N, int K, int R, int D, int U,
int F, int T, int S, int B, int I);

View File

@ -14,7 +14,7 @@
//
//@HEADER
#include <bench.hpp>
#include "bench.hpp"
template void run_stride_unroll<int64_t>(int N, int K, int R, int D, int U,
int F, int T, int S, int B, int I);

View File

@ -15,28 +15,28 @@
//@HEADER
#define UNROLL 1
#include <bench_unroll_stride.hpp>
#include "bench_unroll_stride.hpp"
#undef UNROLL
#define UNROLL 2
#include <bench_unroll_stride.hpp>
#include "bench_unroll_stride.hpp"
#undef UNROLL
#define UNROLL 3
#include <bench_unroll_stride.hpp>
#include "bench_unroll_stride.hpp"
#undef UNROLL
#define UNROLL 4
#include <bench_unroll_stride.hpp>
#include "bench_unroll_stride.hpp"
#undef UNROLL
#define UNROLL 5
#include <bench_unroll_stride.hpp>
#include "bench_unroll_stride.hpp"
#undef UNROLL
#define UNROLL 6
#include <bench_unroll_stride.hpp>
#include "bench_unroll_stride.hpp"
#undef UNROLL
#define UNROLL 7
#include <bench_unroll_stride.hpp>
#include "bench_unroll_stride.hpp"
#undef UNROLL
#define UNROLL 8
#include <bench_unroll_stride.hpp>
#include "bench_unroll_stride.hpp"
#undef UNROLL
template <class Scalar>

View File

@ -26,7 +26,7 @@ struct Run<Scalar, UNROLL, STRIDE> {
Kokkos::deep_copy(C, Scalar(3.5));
Kokkos::Timer timer;
for (int i = 0; i < I; ++i) {
for (int iter = 0; iter < I; ++iter) {
Kokkos::parallel_for(
"BenchmarkKernel",
Kokkos::TeamPolicy<>(N, T).set_scratch_size(0, Kokkos::PerTeam(S)),

View File

@ -16,7 +16,7 @@
#include <Kokkos_Core.hpp>
#include <Kokkos_Timer.hpp>
#include <bench.hpp>
#include "bench.hpp"
#include <cstdlib>
extern template void run_stride_unroll<float>(int, int, int, int, int, int, int,
@ -86,7 +86,7 @@ int main(int argc, char* argv[]) {
printf("D must be one of 1,2,4,8,16,32\n");
return 0;
}
if ((P < 1) && (P > 2)) {
if ((P < 1) || (P > 4)) {
printf("P must be one of 1,2,3,4\n");
return 0;
}

View File

@ -0,0 +1,4 @@
KOKKOS_ADD_EXECUTABLE(
gather
SOURCES main.cpp
)

View File

@ -20,28 +20,28 @@ struct RunGather {
};
#define UNROLL 1
#include <gather_unroll.hpp>
#include "gather_unroll.hpp"
#undef UNROLL
#define UNROLL 2
#include <gather_unroll.hpp>
#include "gather_unroll.hpp"
#undef UNROLL
#define UNROLL 3
#include <gather_unroll.hpp>
#include "gather_unroll.hpp"
#undef UNROLL
#define UNROLL 4
#include <gather_unroll.hpp>
#include "gather_unroll.hpp"
#undef UNROLL
#define UNROLL 5
#include <gather_unroll.hpp>
#include "gather_unroll.hpp"
#undef UNROLL
#define UNROLL 6
#include <gather_unroll.hpp>
#include "gather_unroll.hpp"
#undef UNROLL
#define UNROLL 7
#include <gather_unroll.hpp>
#include "gather_unroll.hpp"
#undef UNROLL
#define UNROLL 8
#include <gather_unroll.hpp>
#include "gather_unroll.hpp"
#undef UNROLL
template <class Scalar>

View File

@ -138,7 +138,7 @@ struct RunGather<Scalar, UNROLL> {
printf(
"SNKDRUF: %i %i %i %i %i %i %i Time: %lfs Bandwidth: %lfGiB/s GFlop/s: "
"%lf GGather/s: %lf\n",
sizeof(Scalar) / 4, N, K, D, R, UNROLL, F, seconds,
static_cast<int>(sizeof(Scalar) / 4), N, K, D, R, UNROLL, F, seconds,
1.0 * bytes / seconds / 1024 / 1024 / 1024, 1.e-9 * flops / seconds,
1.e-9 * gather_ops / seconds);
}

View File

@ -16,7 +16,7 @@
#include <Kokkos_Core.hpp>
#include <Kokkos_Timer.hpp>
#include <gather.hpp>
#include "gather.hpp"
#include <cstdlib>
int main(int argc, char* argv[]) {

View File

@ -0,0 +1,4 @@
KOKKOS_ADD_EXECUTABLE(
launch_latency
SOURCES launch_latency.cpp
)

View File

@ -0,0 +1,283 @@
//@HEADER
// ************************************************************************
//
// Kokkos v. 4.0
// Copyright (2022) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
// See https://kokkos.org/LICENSE for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//@HEADER
/*! \file launch_latency.cpp
Tests of parallel_for and parallel_reduce latency for different
circumstances.
Three launch kinds are tested: parallel_for, parallel_reduce into scalar,
and parallel_reduce into view
N controls how large the parallel loops is
V controls how large the functor is
M controls across how many launches the latency is averaged
K controls how larege the nested loop is (no larger than V)
For each launch kind,
1. Avg functor dispatch latency: (time to do M launches) / M
2. Avg functor completion throughput: (M launches + sync) / M
3. Avg functor completion latency: (M (launch + sync)) / M
*/
#include <Kokkos_Core.hpp>
template <int V>
struct TestFunctor {
double values[V];
Kokkos::View<double*> a;
int K;
TestFunctor(Kokkos::View<double*> a_, int K_) : a(a_), K(K_) {}
KOKKOS_INLINE_FUNCTION
void operator()(const int i) const {
for (int j = 0; j < K; j++) a(i) += 1.0 * i * values[j];
}
};
template <int V>
struct TestRFunctor {
double values[V];
Kokkos::View<double*> a;
int K;
TestRFunctor(Kokkos::View<double*> a_, int K_) : a(a_), K(K_) {}
KOKKOS_INLINE_FUNCTION
void operator()(const int i, double& lsum) const {
for (int j = 0; j < K; j++) a(i) += 1.0 * i * values[j];
lsum += a(i);
}
};
struct Opts {
bool par_for = true;
bool par_reduce = true;
bool par_reduce_view = true;
};
template <int V>
void run(int N, int M, int K, const Opts& opts) {
std::string l_no_fence, l_fence, l_red_no_fence, l_red_fence,
l_red_view_no_fence, l_red_view_fence;
{
std::ostringstream ostream;
ostream << "RunNoFence_" << N << "_" << K << std::endl;
l_no_fence = ostream.str();
}
{
std::ostringstream ostream;
ostream << "RunFence_" << N << "_" << K << std::endl;
l_fence = ostream.str();
}
{
std::ostringstream ostream;
ostream << "RunReduceNoFence_" << N << "_" << K << std::endl;
l_red_no_fence = ostream.str();
}
{
std::ostringstream ostream;
ostream << "RunReduceFence_" << N << "_" << K << std::endl;
l_red_fence = ostream.str();
}
{
std::ostringstream ostream;
ostream << "RunReduceViewNoFence_" << N << "_" << K << std::endl;
l_red_view_no_fence = ostream.str();
}
{
std::ostringstream ostream;
ostream << "RunReduceViewFence_" << N << "_" << K << std::endl;
l_red_view_fence = ostream.str();
}
double result;
Kokkos::View<double*> a("A", N);
Kokkos::View<double> v_result("result");
TestFunctor<V> f(a, K);
TestRFunctor<V> rf(a, K);
Kokkos::Timer timer;
// initialize to an obviously wrong value
double time_no_fence = -1; // launch loop
double time_no_fence_fenced = -1; // launch loop then fence
double time_fence = -1; // launch&fence loop
double time_red_no_fence = -1;
double time_red_no_fence_fenced = -1;
double time_red_fence = -1;
double time_red_view_no_fence = -1;
double time_red_view_no_fence_fenced = -1;
double time_red_view_fence = -1;
if (opts.par_for) {
// warmup
for (int i = 0; i < 4; ++i) {
Kokkos::parallel_for(l_no_fence, N, f);
}
Kokkos::fence();
timer.reset();
for (int i = 0; i < M; i++) {
Kokkos::parallel_for(l_no_fence, N, f);
}
time_no_fence = timer.seconds();
Kokkos::fence();
time_no_fence_fenced = timer.seconds();
timer.reset();
for (int i = 0; i < M; i++) {
Kokkos::parallel_for(l_fence, N, f);
Kokkos::fence();
}
time_fence = timer.seconds();
}
if (opts.par_reduce) {
// warmup
for (int i = 0; i < 4; ++i) {
Kokkos::parallel_reduce(l_red_no_fence, N, rf, result);
}
Kokkos::fence();
timer.reset();
for (int i = 0; i < M; i++) {
Kokkos::parallel_reduce(l_red_no_fence, N, rf, result);
}
time_red_no_fence = timer.seconds();
Kokkos::fence();
time_red_no_fence_fenced = timer.seconds();
timer.reset();
for (int i = 0; i < M; i++) {
Kokkos::parallel_reduce(l_red_fence, N, rf, result);
Kokkos::fence();
}
time_red_fence = timer.seconds();
Kokkos::fence();
}
if (opts.par_reduce_view) {
// warmup
for (int i = 0; i < 4; ++i) {
Kokkos::parallel_reduce(l_red_view_no_fence, N, rf, v_result);
}
Kokkos::fence();
timer.reset();
for (int i = 0; i < M; i++) {
Kokkos::parallel_reduce(l_red_view_no_fence, N, rf, v_result);
}
time_red_view_no_fence = timer.seconds();
Kokkos::fence();
time_red_view_no_fence_fenced = timer.seconds();
timer.reset();
for (int i = 0; i < M; i++) {
Kokkos::parallel_reduce(l_red_view_fence, N, rf, v_result);
Kokkos::fence();
}
time_red_view_fence = timer.seconds();
Kokkos::fence();
timer.reset();
}
const double x = 1.e6 / M;
printf("%i %i %i %i", N, V, K, M);
if (opts.par_for) {
printf(" parallel_for: %lf %lf ( %lf )", x * time_no_fence, x * time_fence,
x * time_no_fence_fenced);
}
if (opts.par_reduce) {
printf(" parallel_reduce: %lf %lf ( %lf )", x * time_red_no_fence,
x * time_red_fence, x * time_red_no_fence_fenced);
}
if (opts.par_reduce_view) {
printf(" parallel_reduce(view): %lf %lf ( %lf )",
x * time_red_view_no_fence, x * time_red_view_fence,
x * time_red_view_no_fence_fenced);
}
printf("\n");
}
int main(int argc, char* argv[]) {
Kokkos::initialize(argc, argv);
{
int N = 10000;
int M = 20;
int K = 1;
Opts opts;
printf("==========================\n");
printf("Kokkos Launch Latency Test\n");
printf("==========================\n");
printf("\n");
printf("Usage: %s ARGUMENTS [OPTIONS...]\n\n", argv[0]);
printf("Arguments: N M K\n");
printf(" N: loop length\n");
printf(" M: how many kernels to dispatch\n");
printf(
" K: nested loop length (capped by size of functor member array\n\n");
printf("Options:\n");
printf(" --no-parallel-for: skip parallel_for benchmark\n");
printf(" --no-parallel-reduce: skip parallel_reduce benchmark\n");
printf(
" --no-parallel-reduce-view: skip parallel_reduce into view "
"benchmark\n");
printf("\n\n");
printf(" Output V is the size of the functor member array\n");
printf("\n\n");
for (int i = 1; i < argc; ++i) {
const std::string_view arg(argv[i]);
// anything that doesn't start with --
if (arg.size() < 2 ||
(arg.size() >= 2 && arg[0] != '-' && arg[1] != '-')) {
if (i == 1)
N = atoi(arg.data());
else if (i == 2)
M = atoi(arg.data());
else if (i == 3)
K = atoi(arg.data());
else {
throw std::runtime_error("unexpected argument!");
}
} else if (arg == "--no-parallel-for") {
opts.par_for = false;
} else if (arg == "--no-parallel-reduce") {
opts.par_reduce = false;
} else if (arg == "--no-parallel-reduce-view") {
opts.par_reduce_view = false;
} else {
std::stringstream ss;
ss << "unexpected argument \"" << arg << "\" at position " << i;
throw std::runtime_error(ss.str());
}
}
printf("N V K M time_no_fence time_fence (time_no_fence_fenced)\n");
/* A backend may have different launch strategies for functors of different
* sizes: test a variety of functor sizes.*/
run<1>(N, M, K <= 1 ? K : 1, opts);
run<16>(N, M, K <= 16 ? K : 16, opts);
run<200>(N, M, K <= 200 ? K : 200, opts);
run<3000>(N, M, K <= 3000 ? K : 3000, opts);
run<30000>(N, M, K <= 30000 ? K : 30000, opts);
}
Kokkos::finalize();
}

View File

@ -0,0 +1,4 @@
KOKKOS_ADD_EXECUTABLE(
policy_performance
SOURCES main.cpp
)

View File

@ -106,8 +106,9 @@ int main(int argc, char* argv[]) {
Kokkos::parallel_reduce(
"parallel_reduce warmup", Kokkos::TeamPolicy<>(10, 1),
KOKKOS_LAMBDA(const Kokkos::TeamPolicy<>::member_type team,
double& lval) { lval += 1; },
KOKKOS_LAMBDA(const Kokkos::TeamPolicy<>::member_type&, double& lval) {
lval += 1;
},
result);
using view_type_1d = Kokkos::View<double*, Kokkos::LayoutRight>;

View File

@ -21,13 +21,13 @@ struct ParallelScanFunctor {
using value_type = double;
ViewType v;
ParallelScanFunctor(const ViewType& v_) : v(v_) {}
explicit ParallelScanFunctor(const ViewType& v_) : v(v_) {}
KOKKOS_INLINE_FUNCTION
void operator()(const int idx, value_type& val, const bool& final) const {
void operator()(const int idx, value_type& val, const bool& is_final) const {
// inclusive scan
val += v(idx);
if (final) {
if (is_final) {
v(idx) = val;
}
}
@ -109,7 +109,7 @@ void test_policy(int team_range, int thread_range, int vector_range,
vector_result = 0.0;
Kokkos::parallel_reduce(
Kokkos::ThreadVectorRange(team, vector_range),
[&](const int vi, double& vval) { vval += 1; },
[&](const int, double& vval) { vval += 1; },
vector_result);
}
v2(idx, t) = vector_result;
@ -128,7 +128,7 @@ void test_policy(int team_range, int thread_range, int vector_range,
team_result = 0.0;
Kokkos::parallel_reduce(
Kokkos::TeamThreadRange(team, thread_range),
[&](const int t, double& lval) { lval += 1; }, team_result);
[&](const int, double& lval) { lval += 1; }, team_result);
}
v1(idx) = team_result;
// prevent compiler optimizing loop away
@ -170,13 +170,13 @@ void test_policy(int team_range, int thread_range, int vector_range,
for (int tr = 0; tr < thread_repeat; ++tr) {
Kokkos::parallel_reduce(
Kokkos::TeamThreadRange(team, thread_range),
[&](const int t, double& lval) {
[&](const int, double& lval) {
double vector_result = 0.0;
for (int vr = 0; vr < inner_repeat; ++vr) {
vector_result = 0.0;
Kokkos::parallel_reduce(
Kokkos::ThreadVectorRange(team, vector_range),
[&](const int vi, double& vval) { vval += 1; },
[&](const int, double& vval) { vval += 1; },
vector_result);
lval += vector_result;
}

Some files were not shown because too many files have changed in this diff Show More