Merge branch 'lammps:develop' into ml-uf3
This commit is contained in:
2
.github/CODEOWNERS
vendored
2
.github/CODEOWNERS
vendored
@ -84,7 +84,7 @@ src/bond.* @sjplimp
|
||||
src/comm*.* @sjplimp
|
||||
src/compute.* @sjplimp
|
||||
src/dihedral.* @sjplimp
|
||||
src/domain.* @sjplimp
|
||||
src/domain.* @sjplimp @stanmoore1
|
||||
src/dump*.* @sjplimp
|
||||
src/error.* @sjplimp
|
||||
src/finish.* @sjplimp
|
||||
|
||||
@ -45,8 +45,8 @@ if(DOWNLOAD_KOKKOS)
|
||||
list(APPEND KOKKOS_LIB_BUILD_ARGS "-DCMAKE_CXX_EXTENSIONS=${CMAKE_CXX_EXTENSIONS}")
|
||||
list(APPEND KOKKOS_LIB_BUILD_ARGS "-DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}")
|
||||
include(ExternalProject)
|
||||
set(KOKKOS_URL "https://github.com/kokkos/kokkos/archive/4.2.01.tar.gz" CACHE STRING "URL for KOKKOS tarball")
|
||||
set(KOKKOS_MD5 "16b9b09ae947d434dfb58fc5c87c2b76" CACHE STRING "MD5 checksum of KOKKOS tarball")
|
||||
set(KOKKOS_URL "https://github.com/kokkos/kokkos/archive/4.3.00.tar.gz" CACHE STRING "URL for KOKKOS tarball")
|
||||
set(KOKKOS_MD5 "889dcea2b5ced3debdc5b0820044bdc4" CACHE STRING "MD5 checksum of KOKKOS tarball")
|
||||
mark_as_advanced(KOKKOS_URL)
|
||||
mark_as_advanced(KOKKOS_MD5)
|
||||
GetFallbackURL(KOKKOS_URL KOKKOS_FALLBACK)
|
||||
@ -71,7 +71,7 @@ if(DOWNLOAD_KOKKOS)
|
||||
add_dependencies(LAMMPS::KOKKOSCORE kokkos_build)
|
||||
add_dependencies(LAMMPS::KOKKOSCONTAINERS kokkos_build)
|
||||
elseif(EXTERNAL_KOKKOS)
|
||||
find_package(Kokkos 4.2.01 REQUIRED CONFIG)
|
||||
find_package(Kokkos 4.3.00 REQUIRED CONFIG)
|
||||
target_link_libraries(lammps PRIVATE Kokkos::kokkos)
|
||||
else()
|
||||
set(LAMMPS_LIB_KOKKOS_SRC_DIR ${LAMMPS_LIB_SOURCE_DIR}/kokkos)
|
||||
|
||||
@ -533,9 +533,6 @@ They must be specified in uppercase.
|
||||
* - A64FX
|
||||
- HOST
|
||||
- ARMv8.2 with SVE Support
|
||||
* - WSM
|
||||
- HOST
|
||||
- Intel Westmere CPU (SSE 4.2)
|
||||
* - SNB
|
||||
- HOST
|
||||
- Intel Sandy/Ivy Bridge CPU (AVX 1)
|
||||
@ -566,18 +563,15 @@ They must be specified in uppercase.
|
||||
* - KNL
|
||||
- HOST
|
||||
- Intel Knights Landing Xeon Phi
|
||||
* - BGQ
|
||||
- HOST
|
||||
- IBM Blue Gene/Q CPU
|
||||
* - POWER7
|
||||
- HOST
|
||||
- IBM POWER7 CPU
|
||||
* - POWER8
|
||||
- HOST
|
||||
- IBM POWER8 CPU
|
||||
* - POWER9
|
||||
- HOST
|
||||
- IBM POWER9 CPU
|
||||
* - RISCV_SG2042
|
||||
- HOST
|
||||
- SG2042 (RISC-V) CPU
|
||||
* - KEPLER30
|
||||
- GPU
|
||||
- NVIDIA Kepler generation CC 3.0 GPU
|
||||
@ -666,7 +660,7 @@ They must be specified in uppercase.
|
||||
- GPU
|
||||
- Intel GPU Ponte Vecchio
|
||||
|
||||
This list was last updated for version 4.2 of the Kokkos library.
|
||||
This list was last updated for version 4.3.0 of the Kokkos library.
|
||||
|
||||
.. tabs::
|
||||
|
||||
|
||||
@ -245,6 +245,7 @@ OPT.
|
||||
* :doc:`oxrna2/coaxstk <pair_oxrna2>`
|
||||
* :doc:`pace (k) <pair_pace>`
|
||||
* :doc:`pace/extrapolation (k) <pair_pace>`
|
||||
* :doc:`pedone (o) <pair_pedone>`
|
||||
* :doc:`pod <pair_pod>`
|
||||
* :doc:`peri/eps <pair_peri>`
|
||||
* :doc:`peri/lps (o) <pair_peri>`
|
||||
|
||||
@ -635,10 +635,10 @@ Tohoku University (under MIT license)
|
||||
|
||||
----------
|
||||
|
||||
.. doxygenfunction:: MathEigen::jacobi3(double const *const *mat, double *eval, double **evec)
|
||||
.. doxygenfunction:: MathEigen::jacobi3(double const *const *mat, double *eval, double **evec, int sort)
|
||||
:project: progguide
|
||||
|
||||
.. doxygenfunction:: MathEigen::jacobi3(double const mat[3][3], double *eval, double evec[3][3])
|
||||
.. doxygenfunction:: MathEigen::jacobi3(double const mat[3][3], double *eval, double evec[3][3], int sort)
|
||||
:project: progguide
|
||||
|
||||
---------------------------
|
||||
|
||||
@ -13,15 +13,44 @@ discussions of such cases.
|
||||
Unknown identifier in data file
|
||||
-------------------------------
|
||||
|
||||
This error happens when LAMMPS encounters a line of text in an unexpected format
|
||||
while reading a data file. This is most commonly cause by inconsistent header and
|
||||
section data. The header section informs LAMMPS how many entries or lines are expected in the
|
||||
various sections (like Atoms, Masses, Pair Coeffs, *etc.*\ ) of the data file.
|
||||
If there is a mismatch, LAMMPS will either keep reading beyond the end of a section
|
||||
or stop reading before the section has ended.
|
||||
This error happens when LAMMPS encounters a line of text with an
|
||||
unexpected keyword while :doc:`reading a data file <read_data>`. This
|
||||
would be either header keywords or section header keywords. This is
|
||||
most commonly due to a mistyped keyword or due to a keyword that is
|
||||
inconsistent with the :doc:`atom style <atom_style>` used.
|
||||
|
||||
Such a mismatch can happen unexpectedly when the first line of the data
|
||||
is *not* a comment as required by the format. That would result in
|
||||
LAMMPS expecting, for instance, 0 atoms because the "atoms" header line
|
||||
is treated as a comment.
|
||||
The header section informs LAMMPS how many entries or lines are expected
|
||||
in the various sections (like Atoms, Masses, Pair Coeffs, *etc.*\ ) of
|
||||
the data file. If there is a mismatch, LAMMPS will either keep reading
|
||||
beyond the end of a section or stop reading before the section has
|
||||
ended. In that case the next line will not contain a recognized keyword.
|
||||
|
||||
Such a mismatch can also happen when the first line of the data
|
||||
is *not* a comment as required by the format, but a line with a valid
|
||||
header keyword. That would result in LAMMPS expecting, for instance,
|
||||
0 atoms because the "atoms" header line is the first line and thus
|
||||
treated as a comment.
|
||||
|
||||
Another possibility to trigger this error is to have a keyword in the
|
||||
data file that corresponds to a fix (e.g. :doc:`fix cmap <fix_cmap>`)
|
||||
but the :doc:`read_data <read_data>` command is missing the (optional)
|
||||
arguments that identify the fix and the header keyword and section
|
||||
keyword or those arguments are inconsistent with the keywords in the
|
||||
data file.
|
||||
|
||||
.. _err0002:
|
||||
|
||||
Incorrect format in ... section of data file
|
||||
--------------------------------------------
|
||||
|
||||
This error happens when LAMMPS reads the contents of a section of a
|
||||
:doc:`data file <read_data>` and the number of parameters in the line
|
||||
differs from what is expected. This most commonly happens, when the
|
||||
atom style is different from what is expected for a specific data file
|
||||
since changing the atom style usually changes the format of the line.
|
||||
|
||||
This error can also happen when the number of entries indicated in the
|
||||
header of a data file (e.g. the number of atoms) is larger than the
|
||||
number of lines provided (e.g. in the corresponding Atoms section)
|
||||
and then LAMMPS will continue reading into the next section and that
|
||||
would have a completely different format.
|
||||
|
||||
@ -65,7 +65,6 @@ Examples
|
||||
fix 1 all ave/correlate 1 50 10000 &
|
||||
c_thermo_press[1] c_thermo_press[2] c_thermo_press[3] &
|
||||
type upper ave running title1 "My correlation data"
|
||||
|
||||
fix 1 all ave/correlate 1 50 10000 c_thermo_press[*]
|
||||
|
||||
Description
|
||||
|
||||
@ -20,11 +20,11 @@ Syntax
|
||||
.. parsed-literal::
|
||||
|
||||
c_ID = global scalar calculated by a compute with ID
|
||||
c_ID[I] = Ith component of global vector calculated by a compute with ID
|
||||
c_ID[I] = Ith component of global vector calculated by a compute with ID, I can include wildcard (see below)
|
||||
f_ID = global scalar calculated by a fix with ID
|
||||
f_ID[I] = Ith component of global vector calculated by a fix with ID
|
||||
f_ID[I] = Ith component of global vector calculated by a fix with ID, I can include wildcard (see below)
|
||||
v_name = global value calculated by an equal-style variable with name
|
||||
v_name[I] = Ith component of global vector calculated by a vector-style variable with name
|
||||
v_name[I] = Ith component of a vector-style variable with name, I can include wildcard (see below)
|
||||
|
||||
* zero or more keyword/arg pairs may be appended
|
||||
* keyword = *type* or *start* or *file* or *overwrite* or *title1* or *title2* or *ncorr* or *nlen* or *ncount*
|
||||
@ -63,6 +63,7 @@ Examples
|
||||
fix 1 all ave/correlate/long 1 10000 &
|
||||
c_thermo_press[1] c_thermo_press[2] c_thermo_press[3] &
|
||||
type upper title1 "My correlation data" nlen 15 ncount 3
|
||||
fix 1 all ave/correlate/long 1 10000 c_thermo_press[*]
|
||||
|
||||
Description
|
||||
"""""""""""
|
||||
@ -80,8 +81,10 @@ specified values may represent calculations performed by computes and
|
||||
fixes which store their own "group" definitions.
|
||||
|
||||
Each listed value can be the result of a compute or fix or the
|
||||
evaluation of an equal-style variable. See the
|
||||
:doc:`fix ave/correlate <fix_ave_correlate>` page for details.
|
||||
evaluation of an equal-style or vector-style variable. For
|
||||
vector-style variables, the specified indices can include a wildcard
|
||||
character. See the :doc:`fix ave/correlate <fix_ave_correlate>` page
|
||||
for details.
|
||||
|
||||
The *Nevery* and *Nfreq* arguments specify on what time steps the input
|
||||
values will be used to calculate correlation data and the frequency
|
||||
|
||||
@ -136,23 +136,23 @@ transfer between the subsystems:
|
||||
\bigtriangledown (\kappa_e \bigtriangledown T_e) -
|
||||
g_p (T_e - T_a) + g_s T_a'
|
||||
|
||||
where C_e is the specific heat, rho_e is the density, kappa_e is the
|
||||
thermal conductivity, T is temperature, the "e" and "a" subscripts
|
||||
represent electronic and atomic subsystems respectively, g_p is the
|
||||
coupling constant for the electron-ion interaction, and g_s is the
|
||||
electron stopping coupling parameter. C_e, rho_e, and kappa_e are
|
||||
specified as parameters to the fix. The other quantities are derived.
|
||||
The form of the heat diffusion equation used here is almost the same
|
||||
as that in equation 6 of :ref:`(Duffy) <Duffy>`, with the exception that the
|
||||
electronic density is explicitly represented, rather than being part
|
||||
of the specific heat parameter.
|
||||
where :math:`C_e` is the specific heat, :math:`\rho_e` is the density,
|
||||
:math:`\kappa_e` is the thermal conductivity, *T* is temperature, the
|
||||
"e" and "a" subscripts represent electronic and atomic subsystems
|
||||
respectively, :math:`g_p` is the coupling constant for the electron-ion
|
||||
interaction, and :math:`g_s` is the electron stopping coupling
|
||||
parameter. :math:`C_e`, :math:`\rho_e`, and :math:`\kappa_e` are
|
||||
specified as parameters to the fix *ttm* or *ttm/grid*. The other
|
||||
quantities are derived. The form of the heat diffusion equation used
|
||||
here is almost the same as that in equation 6 of :ref:`(Duffy) <Duffy>`,
|
||||
with the exception that the electronic density is explicitly
|
||||
represented, rather than being part of the specific heat parameter.
|
||||
|
||||
Currently, the TTM fixes assume that none of the user-supplied
|
||||
parameters will vary with temperature. Note that :ref:`(Duffy)
|
||||
<Duffy>` used a tanh() functional form for the temperature dependence
|
||||
of the electronic specific heat, but ignored temperature dependencies
|
||||
of any of the other parameters. See more discussion below for fix
|
||||
ttm/mod.
|
||||
parameters will vary with temperature. Note that :ref:`(Duffy) <Duffy>`
|
||||
used a tanh() functional form for the temperature dependence of the
|
||||
electronic specific heat, but ignored temperature dependencies of any of
|
||||
the other parameters. See more discussion below for fix *ttm/mod*.
|
||||
|
||||
.. note::
|
||||
|
||||
@ -265,27 +265,27 @@ heat sources (e.g. laser heating in ablation simulations):
|
||||
\bigtriangledown (\kappa_e \bigtriangledown T_e) -
|
||||
g_p (T_e - T_a) + g_s T_a' + \theta (x-x_{surface})I_0 \exp(-x/l_{skin})
|
||||
|
||||
where theta is the Heaviside step function, I_0 is the (absorbed)
|
||||
laser pulse intensity for ablation simulations, l_skin is the depth
|
||||
of skin-layer, and all other designations have the same meaning as in
|
||||
the former equation. The duration of the pulse is set by the parameter
|
||||
*tau* in the *init_file*.
|
||||
where :math:`\theta` is the Heaviside step function, :math:`I_0` is the
|
||||
(absorbed) laser pulse intensity for ablation simulations,
|
||||
:math:`l_{skin}` is the depth of the skin-layer, and all other
|
||||
designations have the same meaning as in the former equation. The
|
||||
duration of the pulse is set by the parameter *tau* in the *init_file*.
|
||||
|
||||
Fix ttm/mod also allows users to specify the dependencies of C_e and
|
||||
kappa_e on the electronic temperature. The specific heat is expressed
|
||||
as
|
||||
Fix *ttm/mod* also allows users to specify the dependencies of
|
||||
:math:`C_e` and :math:`\kappa_e` on the electronic temperature. The
|
||||
specific heat is expressed as
|
||||
|
||||
.. math::
|
||||
|
||||
C_e = C_0 + (a_0 + a_1 X + a_2 X^2 + a_3 X^3 + a_4 X^4) \exp (-(AX)^2)
|
||||
|
||||
where *X* = T_e/1000, and the thermal conductivity is defined as
|
||||
kappa_e = D_e\*rho_e\*C_e, where D_e is the thermal diffusion
|
||||
coefficient.
|
||||
where :math:`X = \frac{T_e}{1000}`, and the thermal conductivity is
|
||||
defined as :math:`\kappa_e = D_e \cdot rho_e \cdot C_e`, where
|
||||
:math:`D_e` is the thermal diffusion coefficient.
|
||||
|
||||
Electronic pressure effects are included in the TTM model to account
|
||||
for the blast force acting on ions because of electronic pressure
|
||||
gradient (see :ref:`(Chen) <Chen>`, :ref:`(Norman) <Norman>`). The total force
|
||||
Electronic pressure effects are included in the TTM model to account for
|
||||
the blast force acting on ions because of electronic pressure gradient
|
||||
(see :ref:`(Chen) <Chen>`, :ref:`(Norman) <Norman>`). The total force
|
||||
acting on an ion is:
|
||||
|
||||
.. math::
|
||||
@ -293,13 +293,14 @@ acting on an ion is:
|
||||
{\vec F}_i = - \partial U / \partial {\vec r}_i + {\vec
|
||||
F}_{langevin} - \nabla P_e/n_{ion}
|
||||
|
||||
where F_langevin is a force from Langevin thermostat simulating
|
||||
electron-phonon coupling, and nabla P_e/n_ion is the electron blast
|
||||
force.
|
||||
where :math:`F_{langevin}` is a force from Langevin thermostat
|
||||
simulating electron-phonon coupling, and :math:`\nabla P_e/n_{ion}` is
|
||||
the electron blast force.
|
||||
|
||||
The electronic pressure is taken to be P_e = B\*rho_e\*C_e\*T_e
|
||||
The electronic pressure is taken to be :math:`P_e = B \cdot rho_e \cdot
|
||||
C_e \cdot T_e`
|
||||
|
||||
The current fix ttm/mod implementation allows TTM simulations with a
|
||||
The current fix *ttm/mod* implementation allows TTM simulations with a
|
||||
vacuum. The vacuum region is defined as the grid cells with zero
|
||||
electronic temperature. The numerical scheme does not allow energy
|
||||
exchange with such cells. Since the material can expand to previously
|
||||
@ -319,10 +320,10 @@ electronic pressure gradient is calculated as
|
||||
\frac{x}{x+\lambda}\frac{(C_e{}T_e)_{x+\Delta
|
||||
x}-(C_e{}T_e)_{x}}{\Delta x} \right]
|
||||
|
||||
where lambda is the electron mean free path (see :ref:`(Norman) <Norman>`,
|
||||
:ref:`(Pisarev) <Pisarev>`)
|
||||
where :math:`\lambda` is the electron mean free path (see :ref:`(Norman)
|
||||
<Norman>`, :ref:`(Pisarev) <Pisarev>`)
|
||||
|
||||
The fix ttm/mod parameter file *init_file* has the following syntax.
|
||||
The fix *ttm/mod* parameter file *init_file* has the following syntax.
|
||||
Every line with an odd number is considered as a comment and
|
||||
ignored. The lines with the even numbers are treated as follows:
|
||||
|
||||
|
||||
137
doc/src/pair_pedone.rst
Normal file
137
doc/src/pair_pedone.rst
Normal file
@ -0,0 +1,137 @@
|
||||
.. index:: pair_style pedone
|
||||
.. index:: pair_style pedone/omp
|
||||
|
||||
pair_style pedone command
|
||||
=========================
|
||||
|
||||
Accelerator Variants: *pedone/omp*
|
||||
|
||||
|
||||
Syntax
|
||||
""""""
|
||||
|
||||
.. code-block:: LAMMPS
|
||||
|
||||
pair_style style args
|
||||
|
||||
* style = pedone*
|
||||
* args = list of arguments for a particular style
|
||||
|
||||
.. parsed-literal::
|
||||
|
||||
*pedone* args = cutoff
|
||||
cutoff = global cutoff for Pedone interactions (distance units)
|
||||
|
||||
Examples
|
||||
""""""""
|
||||
|
||||
.. code-block:: LAMMPS
|
||||
|
||||
pair_style hybrid/overlay pedone 15.0 coul/long 15.0
|
||||
kspace_style pppm 1.0e-5
|
||||
|
||||
pair_coeff * * coul/long
|
||||
pair_coeff 1 2 pedone 0.030211 2.241334 2.923245 5.0
|
||||
pair_coeff 2 2 pedone 0.042395 1.379316 3.618701 22.0
|
||||
|
||||
Used in input scripts:
|
||||
|
||||
.. parsed-literal::
|
||||
|
||||
examples/PACKAGES/pedone/in.pedone.relax
|
||||
examples/PACKAGES/pedone/in.pedone.melt
|
||||
|
||||
|
||||
|
||||
Description
|
||||
"""""""""""
|
||||
|
||||
.. versionadded:: TBD
|
||||
|
||||
Pair style *pedone* computes the **non-Coulomb** interactions of the Pedone
|
||||
(or PMMCS) potential :ref:`(Pedone) <Pedone>` which combines Coulomb
|
||||
interactions, Morse potential, and repulsive :math:`r^{-12}`
|
||||
Lennard-Jones terms (see below). The *pedone* pair style is meant
|
||||
to be used in addition to a :doc:`Coulomb pair style <pair_coul>` via
|
||||
pair style :doc:`hybrid/overlay <pair_hybrid>` (see example above).
|
||||
Using *coul/long* or *could/dsf* (for solids) is recommended.
|
||||
|
||||
The full Pedone potential function from :ref:`(Pedone) <Pedone>` for each
|
||||
pair of atoms is:
|
||||
|
||||
.. math::
|
||||
|
||||
E = \frac{C q_i q_j}{\epsilon r}
|
||||
+ D_0 \left[ e^{- 2 \alpha (r - r_0)} - 2 e^{- \alpha (r - r_0)} \right]
|
||||
+ \frac{B_0}{r^{12}} \qquad r < r_c
|
||||
|
||||
:math:`r_c` is the cutoff and :math:`C` is a conversion factor that is
|
||||
specific to the choice of :doc:`units <units>` so that the entire
|
||||
Coulomb term is in energy units with :math:`q_i` and :math:`q_j` as the
|
||||
assigned charges in multiples of the elementary charge.
|
||||
|
||||
The following coefficients must be defined for the selected pairs of
|
||||
atom types via the :doc:`pair_coeff <pair_coeff>` command as in the
|
||||
example above:
|
||||
|
||||
* :math:`D_0` (energy units)
|
||||
* :math:`\alpha` (1/distance units)
|
||||
* :math:`r_0` (distance units)
|
||||
* :math:`C_0` (energy units)
|
||||
* cutoff (distance units)
|
||||
|
||||
The last coefficient is optional. If not specified, the global *pedone*
|
||||
cutoff is used.
|
||||
|
||||
----------
|
||||
|
||||
.. include:: accel_styles.rst
|
||||
|
||||
----------
|
||||
|
||||
Mixing, shift, table, tail correction, restart, rRESPA info
|
||||
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
|
||||
|
||||
This pair style does not support mixing.
|
||||
|
||||
This pair style support the :doc:`pair_modify <pair_modify>` shift
|
||||
option for the energy of the pair interaction.
|
||||
|
||||
This pair style does not support the :doc:`pair_modify <pair_modify>`
|
||||
tail option for adding long-range tail corrections to energy and
|
||||
pressure.
|
||||
|
||||
This pair style writes its information to :doc:`binary restart files <restart>`,
|
||||
so pair_style and pair_coeff commands does not need to be specified in an input
|
||||
script that reads a restart file.
|
||||
|
||||
This pair style can only be used via the *pair* keyword of the
|
||||
:doc:`run_style respa <run_style>` command. It does not support the
|
||||
*inner*, *middle*, or *outer* keywords.
|
||||
|
||||
----------
|
||||
|
||||
Restrictions
|
||||
""""""""""""
|
||||
|
||||
The *pedone* pair style is only enabled if LAMMPS was built with the
|
||||
EXTRA-PAIR package. See the :doc:`Build package <Build_package>` page
|
||||
for more info.
|
||||
|
||||
Related commands
|
||||
""""""""""""""""
|
||||
|
||||
:doc:`pair_coeff <pair_coeff>`, :doc:`pair_style <pair_style>`,
|
||||
:doc:`pair style coul/long and coul/dsf <pair_coul>`,
|
||||
:doc:`pair style morse <pair_morse>`
|
||||
|
||||
Default
|
||||
"""""""
|
||||
|
||||
none
|
||||
|
||||
-------------
|
||||
|
||||
.. _Pedone:
|
||||
|
||||
**(Pedone)** A. Pedone, G. Malavasi, M. C. Menziani, A. N. Cormack, and U. Segre, J. Phys. Chem. B, 110, 11780 (2006)
|
||||
@ -275,30 +275,30 @@ accelerated styles exist.
|
||||
* :doc:`lj/smooth/linear <pair_lj_smooth_linear>` - linear smoothed LJ potential
|
||||
* :doc:`lj/switch3/coulgauss/long <pair_lj_switch3_coulgauss_long>` - smoothed LJ vdW potential with Gaussian electrostatics
|
||||
* :doc:`lj96/cut <pair_lj96>` - Lennard-Jones 9/6 potential
|
||||
* :doc:`local/density <pair_local_density>` - generalized basic local density potential
|
||||
* :doc:`lubricate <pair_lubricate>` - hydrodynamic lubrication forces
|
||||
* :doc:`lubricate/poly <pair_lubricate>` - hydrodynamic lubrication forces with polydispersity
|
||||
* :doc:`lubricateU <pair_lubricateU>` - hydrodynamic lubrication forces for Fast Lubrication Dynamics
|
||||
* :doc:`lubricateU/poly <pair_lubricateU>` - hydrodynamic lubrication forces for Fast Lubrication with polydispersity
|
||||
* :doc:`local/density <pair_local_density>` - Generalized basic local density potential
|
||||
* :doc:`lubricate <pair_lubricate>` - Hydrodynamic lubrication forces
|
||||
* :doc:`lubricate/poly <pair_lubricate>` - Hydrodynamic lubrication forces with polydispersity
|
||||
* :doc:`lubricateU <pair_lubricateU>` - Hydrodynamic lubrication forces for Fast Lubrication Dynamics
|
||||
* :doc:`lubricateU/poly <pair_lubricateU>` - Hydrodynamic lubrication forces for Fast Lubrication with polydispersity
|
||||
* :doc:`mdpd <pair_mesodpd>` - mDPD particle interactions
|
||||
* :doc:`mdpd/rhosum <pair_mesodpd>` - mDPD particle interactions for mass density
|
||||
* :doc:`meam <pair_meam>` - modified embedded atom method (MEAM)
|
||||
* :doc:`meam/ms <pair_meam>` - multi-state modified embedded atom method (MS-MEAM)
|
||||
* :doc:`meam/spline <pair_meam_spline>` - splined version of MEAM
|
||||
* :doc:`meam/sw/spline <pair_meam_sw_spline>` - splined version of MEAM with a Stillinger-Weber term
|
||||
* :doc:`mesocnt <pair_mesocnt>` - mesoscopic vdW potential for (carbon) nanotubes
|
||||
* :doc:`mesocnt/viscous <pair_mesocnt>` - mesoscopic vdW potential for (carbon) nanotubes with friction
|
||||
* :doc:`mgpt <pair_mgpt>` - simplified model generalized pseudopotential theory (MGPT) potential
|
||||
* :doc:`meam <pair_meam>` - Modified embedded atom method (MEAM)
|
||||
* :doc:`meam/ms <pair_meam>` - Multi-state modified embedded atom method (MS-MEAM)
|
||||
* :doc:`meam/spline <pair_meam_spline>` - Splined version of MEAM
|
||||
* :doc:`meam/sw/spline <pair_meam_sw_spline>` - Splined version of MEAM with a Stillinger-Weber term
|
||||
* :doc:`mesocnt <pair_mesocnt>` - Mesoscopic vdW potential for (carbon) nanotubes
|
||||
* :doc:`mesocnt/viscous <pair_mesocnt>` - Mesoscopic vdW potential for (carbon) nanotubes with friction
|
||||
* :doc:`mgpt <pair_mgpt>` - Simplified model generalized pseudopotential theory (MGPT) potential
|
||||
* :doc:`mie/cut <pair_mie>` - Mie potential
|
||||
* :doc:`mm3/switch3/coulgauss/long <pair_lj_switch3_coulgauss_long>` - smoothed MM3 vdW potential with Gaussian electrostatics
|
||||
* :doc:`mm3/switch3/coulgauss/long <pair_lj_switch3_coulgauss_long>` - Smoothed MM3 vdW potential with Gaussian electrostatics
|
||||
* :doc:`momb <pair_momb>` - Many-Body Metal-Organic (MOMB) force field
|
||||
* :doc:`morse <pair_morse>` - Morse potential
|
||||
* :doc:`morse/smooth/linear <pair_morse>` - linear smoothed Morse potential
|
||||
* :doc:`morse/smooth/linear <pair_morse>` - Linear smoothed Morse potential
|
||||
* :doc:`morse/soft <pair_morse>` - Morse potential with a soft core
|
||||
* :doc:`multi/lucy <pair_multi_lucy>` - DPD potential with density-dependent force
|
||||
* :doc:`multi/lucy/rx <pair_multi_lucy_rx>` - reactive DPD potential with density-dependent force
|
||||
* :doc:`nb3b/harmonic <pair_nb3b>` - non-bonded 3-body harmonic potential
|
||||
* :doc:`nb3b/screened <pair_nb3b>` - non-bonded 3-body screened harmonic potential
|
||||
* :doc:`nb3b/harmonic <pair_nb3b>` - Non-bonded 3-body harmonic potential
|
||||
* :doc:`nb3b/screened <pair_nb3b>` - Non-bonded 3-body screened harmonic potential
|
||||
* :doc:`nm/cut <pair_nm>` - N-M potential
|
||||
* :doc:`nm/cut/coul/cut <pair_nm>` - N-M potential with cutoff Coulomb
|
||||
* :doc:`nm/cut/coul/long <pair_nm>` - N-M potential with long-range Coulomb
|
||||
@ -322,21 +322,22 @@ accelerated styles exist.
|
||||
* :doc:`oxrna2/xstk <pair_oxrna2>` -
|
||||
* :doc:`pace <pair_pace>` - Atomic Cluster Expansion (ACE) machine-learning potential
|
||||
* :doc:`pace/extrapolation <pair_pace>` - Atomic Cluster Expansion (ACE) machine-learning potential with extrapolation grades
|
||||
* :doc:`pedone <pair_pedone>` - Pedone (PMMCS) potential (non-Coulomb part)
|
||||
* :doc:`pod <pair_pod>` - Proper orthogonal decomposition (POD) machine-learning potential
|
||||
* :doc:`peri/eps <pair_peri>` - peridynamic EPS potential
|
||||
* :doc:`peri/lps <pair_peri>` - peridynamic LPS potential
|
||||
* :doc:`peri/pmb <pair_peri>` - peridynamic PMB potential
|
||||
* :doc:`peri/ves <pair_peri>` - peridynamic VES potential
|
||||
* :doc:`polymorphic <pair_polymorphic>` - polymorphic 3-body potential
|
||||
* :doc:`peri/eps <pair_peri>` - Peridynamic EPS potential
|
||||
* :doc:`peri/lps <pair_peri>` - Peridynamic LPS potential
|
||||
* :doc:`peri/pmb <pair_peri>` - Peridynamic PMB potential
|
||||
* :doc:`peri/ves <pair_peri>` - Peridynamic VES potential
|
||||
* :doc:`polymorphic <pair_polymorphic>` - Polymorphic 3-body potential
|
||||
* :doc:`python <pair_python>` -
|
||||
* :doc:`quip <pair_quip>` -
|
||||
* :doc:`rann <pair_rann>` -
|
||||
* :doc:`reaxff <pair_reaxff>` - ReaxFF potential
|
||||
* :doc:`rebo <pair_airebo>` - second generation REBO potential of Brenner
|
||||
* :doc:`rebo <pair_airebo>` - Second generation REBO potential of Brenner
|
||||
* :doc:`rebomos <pair_rebomos>` - REBOMoS potential for MoS2
|
||||
* :doc:`resquared <pair_resquared>` - Everaers RE-Squared ellipsoidal potential
|
||||
* :doc:`saip/metal <pair_saip_metal>` - interlayer potential for hetero-junctions formed with hexagonal 2D materials and metal surfaces
|
||||
* :doc:`sdpd/taitwater/isothermal <pair_sdpd_taitwater_isothermal>` - smoothed dissipative particle dynamics for water at isothermal conditions
|
||||
* :doc:`saip/metal <pair_saip_metal>` - Interlayer potential for hetero-junctions formed with hexagonal 2D materials and metal surfaces
|
||||
* :doc:`sdpd/taitwater/isothermal <pair_sdpd_taitwater_isothermal>` - Smoothed dissipative particle dynamics for water at isothermal conditions
|
||||
* :doc:`smatb <pair_smatb>` - Second Moment Approximation to the Tight Binding
|
||||
* :doc:`smatb/single <pair_smatb>` - Second Moment Approximation to the Tight Binding for single-element systems
|
||||
* :doc:`smd/hertz <pair_smd_hertz>` -
|
||||
|
||||
@ -279,9 +279,9 @@ This means the variable can then be evaluated as many times as desired
|
||||
and will return those values. There are two ways to cause the next
|
||||
set of per-atom values from the file to be read: use the
|
||||
:doc:`next <next>` command or the next() function in an atom-style
|
||||
variable, as discussed below. Unlike most variable styles
|
||||
atomfile-style variables are **deleted** during a :doc:`clear <clear>`
|
||||
command.
|
||||
variable, as discussed below. Unlike most variable styles, which
|
||||
remain defined, atomfile-style variables are **deleted** during a
|
||||
:doc:`clear <clear>` command.
|
||||
|
||||
The rules for formatting the file are as follows. Each time a set of
|
||||
per-atom values is read, a non-blank line is searched for in the file.
|
||||
@ -289,23 +289,37 @@ The file is read line by line but only up to 254 characters are used.
|
||||
The rest are ignored. A comment character "#" can be used anywhere
|
||||
on a line and all text following and the "#" character are ignored;
|
||||
text starting with the comment character is stripped. Blank lines
|
||||
are skipped. The first "word" of a non-blank line, delimited by
|
||||
white-space, is read as the count N of per-atom lines to immediately
|
||||
follow. N can be the total number of atoms in the system, or only a
|
||||
subset. The next N lines have the following format
|
||||
|
||||
.. parsed-literal::
|
||||
|
||||
ID value
|
||||
|
||||
where ID is an atom ID and value is the per-atom numeric value that
|
||||
will be assigned to that atom. IDs can be listed in any order.
|
||||
are skipped. The first non-blank line is expected to contain a single
|
||||
integer number as the count *N* of per-atom lines to follow. *N* can
|
||||
be the total number of atoms in the system or less, indicating that data
|
||||
for a subset is read. The next N lines must consist of two numbers,
|
||||
the atom-ID of the atom for which a value is set followed by a floating
|
||||
point number with the value. The atom-IDs may be listed in any order.
|
||||
|
||||
.. note::
|
||||
|
||||
Every time a set of per-atom lines is read, the value for all
|
||||
atoms is first set to 0.0. Thus values for atoms whose ID does not
|
||||
appear in the set, will remain 0.0.
|
||||
Every time a set of per-atom lines is read, the value of the atomfile
|
||||
variable for **all** atoms is first initialized to 0.0. Thus values
|
||||
for atoms whose ID do not appear in the set in the file will remain
|
||||
at 0.0.
|
||||
|
||||
Below is a small example for the atomfile variable file format:
|
||||
|
||||
.. parsed-literal::
|
||||
|
||||
# first set
|
||||
4
|
||||
# atom-ID value
|
||||
3 1
|
||||
4 -4
|
||||
1 0.5
|
||||
2 -0.5
|
||||
|
||||
# second set
|
||||
2
|
||||
|
||||
2 1.0
|
||||
4 -1.0
|
||||
|
||||
----------
|
||||
|
||||
@ -1174,12 +1188,17 @@ custom atom properties are the same; just replace the leading "i" with
|
||||
|
||||
+--------+---------------+------------------------------------------+
|
||||
| equal | i_name[I] | element of per-atom vector (I = atom ID) |
|
||||
+--------+---------------+------------------------------------------+
|
||||
| equal | i2_name[I][J] | element of per-atom array (I = atom ID) |
|
||||
+--------+---------------+------------------------------------------+
|
||||
+--------+---------------+------------------------------------------+
|
||||
| vector | i_name[I] | element of per-atom vector (I = atom ID) |
|
||||
+--------+---------------+------------------------------------------+
|
||||
| vector | i2_name[I][J] | element of per-atom array (I = atom ID) |
|
||||
+--------+---------------+------------------------------------------+
|
||||
+--------+---------------+------------------------------------------+
|
||||
| atom | i_name | per-atom vector |
|
||||
+--------+---------------+------------------------------------------+
|
||||
| atom | i2_name[I] | column of per-atom array |
|
||||
+--------+---------------+------------------------------------------+
|
||||
|
||||
@ -1222,15 +1241,23 @@ table:
|
||||
|
||||
+--------+------------+------------------------------------------+
|
||||
| equal | c_ID | global scalar |
|
||||
+--------+------------+------------------------------------------+
|
||||
| equal | c_ID[I] | element of global vector |
|
||||
+--------+------------+------------------------------------------+
|
||||
| equal | c_ID[I][J] | element of global array |
|
||||
+--------+------------+------------------------------------------+
|
||||
| equal | C_ID[I] | element of per-atom vector (I = atom ID) |
|
||||
+--------+------------+------------------------------------------+
|
||||
| equal | C_ID[I][J] | element of per-atom array (I = atom ID) |
|
||||
+--------+------------+------------------------------------------+
|
||||
+--------+------------+------------------------------------------+
|
||||
| vector | c_ID | global vector |
|
||||
+--------+------------+------------------------------------------+
|
||||
| vector | c_ID[I] | column of global array |
|
||||
+--------+------------+------------------------------------------+
|
||||
+--------+------------+------------------------------------------+
|
||||
| atom | c_ID | per-atom vector |
|
||||
+--------+------------+------------------------------------------+
|
||||
| atom | c_ID[I] | column of per-atom array |
|
||||
+--------+------------+------------------------------------------+
|
||||
|
||||
@ -1286,15 +1313,23 @@ and atom-style variables are listed in the following table:
|
||||
|
||||
+--------+------------+------------------------------------------+
|
||||
| equal | f_ID | global scalar |
|
||||
+--------+------------+------------------------------------------+
|
||||
| equal | f_ID[I] | element of global vector |
|
||||
+--------+------------+------------------------------------------+
|
||||
| equal | f_ID[I][J] | element of global array |
|
||||
+--------+------------+------------------------------------------+
|
||||
| equal | F_ID[I] | element of per-atom vector (I = atom ID) |
|
||||
+--------+------------+------------------------------------------+
|
||||
| equal | F_ID[I][J] | element of per-atom array (I = atom ID) |
|
||||
+--------+------------+------------------------------------------+
|
||||
+--------+------------+------------------------------------------+
|
||||
| vector | f_ID | global vector |
|
||||
+--------+------------+------------------------------------------+
|
||||
| vector | f_ID[I] | column of global array |
|
||||
+--------+------------+------------------------------------------+
|
||||
+--------+------------+------------------------------------------+
|
||||
| atom | f_ID | per-atom vector |
|
||||
+--------+------------+------------------------------------------+
|
||||
| atom | f_ID[I] | column of per-atom array |
|
||||
+--------+------------+------------------------------------------+
|
||||
|
||||
@ -1365,17 +1400,27 @@ per-atom vector.
|
||||
|
||||
+--------+-----------+-----------------------------------------------------------------------------------+
|
||||
| equal | v_name | global scalar from an equal-style variable |
|
||||
+--------+-----------+-----------------------------------------------------------------------------------+
|
||||
| equal | v_name[I] | element of global vector from a vector-style variable |
|
||||
+--------+-----------+-----------------------------------------------------------------------------------+
|
||||
| equal | v_name[I] | element of per-atom vector (I = atom ID) from an atom- or atomfile-style variable |
|
||||
+--------+-----------+-----------------------------------------------------------------------------------+
|
||||
+--------+-----------+-----------------------------------------------------------------------------------+
|
||||
| vector | v_name | global scalar from an equal-style variable |
|
||||
+--------+-----------+-----------------------------------------------------------------------------------+
|
||||
| vector | v_name | global vector from a vector-style variable |
|
||||
+--------+-----------+-----------------------------------------------------------------------------------+
|
||||
| vector | v_name[I] | element of global vector from a vector-style variable |
|
||||
+--------+-----------+-----------------------------------------------------------------------------------+
|
||||
| vector | v_name[I] | element of per-atom vector (I = atom ID) from an atom- or atomfile-style variable |
|
||||
+--------+-----------+-----------------------------------------------------------------------------------+
|
||||
+--------+-----------+-----------------------------------------------------------------------------------+
|
||||
| atom | v_name | global scalar from an equal-style variable |
|
||||
+--------+-----------+-----------------------------------------------------------------------------------+
|
||||
| atom | v_name | per-atom vector from an atom-style or atomfile-style variable |
|
||||
+--------+-----------+-----------------------------------------------------------------------------------+
|
||||
| atom | v_name[I] | element of global vector from a vector-style variable |
|
||||
+--------+-----------+-----------------------------------------------------------------------------------+
|
||||
| atom | v_name[I] | element of per-atom vector (I = atom ID) from an atom- or atomfile-style variable |
|
||||
+--------+-----------+-----------------------------------------------------------------------------------+
|
||||
|
||||
|
||||
@ -2043,6 +2043,7 @@ Makefiles
|
||||
makelist
|
||||
makepkg
|
||||
Makse
|
||||
Malavasi
|
||||
malloc
|
||||
Malolepsza
|
||||
Manby
|
||||
@ -2152,6 +2153,7 @@ membered
|
||||
memcheck
|
||||
Mendelev
|
||||
Menon
|
||||
Menziani
|
||||
mer
|
||||
Meremianin
|
||||
Mersenne
|
||||
@ -2775,6 +2777,8 @@ Peachey
|
||||
peachpuff
|
||||
Pearlman
|
||||
Pedersen
|
||||
pedone
|
||||
Pedone
|
||||
peID
|
||||
PEigenDense
|
||||
Peng
|
||||
|
||||
38
examples/PACKAGES/pedone/in.pedone.melt
Normal file
38
examples/PACKAGES/pedone/in.pedone.melt
Normal file
@ -0,0 +1,38 @@
|
||||
# Ca-O melt with Pedone potential
|
||||
|
||||
units metal
|
||||
atom_style charge
|
||||
|
||||
lattice fcc 4.8105 # experimental lattice parameter for fcc-lattice Ca cations
|
||||
|
||||
region box block 0 4 0 4 0 4
|
||||
create_box 2 box
|
||||
create_atoms 1 box
|
||||
|
||||
lattice fcc 4.8105 origin 0.5 0.5 0.5 # O anion lattice shifted by half a unit cell
|
||||
create_atoms 2 box
|
||||
|
||||
mass 1 40.078
|
||||
mass 2 15.999
|
||||
|
||||
set type 1 charge 1.2
|
||||
set type 2 charge -1.2
|
||||
|
||||
timestep 0.002
|
||||
neigh_modify delay 5 every 1 check yes
|
||||
|
||||
pair_style hybrid/overlay pedone 15.0 coul/long 15.0
|
||||
kspace_style pppm 1.0e-6
|
||||
|
||||
pair_coeff * * coul/long
|
||||
pair_coeff 1 2 pedone 0.030211 2.241334 2.923245 5.0
|
||||
pair_coeff 2 2 pedone 0.042395 1.379316 3.618701 22.0
|
||||
|
||||
velocity all create 6000.0 98347
|
||||
|
||||
fix 1 all nvt temp 3000.0 3000.0 0.1
|
||||
|
||||
# dump 1 all atom 500 Ca-O-melt.lammpstrj
|
||||
|
||||
thermo 100
|
||||
run 1000
|
||||
38
examples/PACKAGES/pedone/in.pedone.relax
Normal file
38
examples/PACKAGES/pedone/in.pedone.relax
Normal file
@ -0,0 +1,38 @@
|
||||
# Ca-O crystal with Pedone potential
|
||||
|
||||
units metal
|
||||
atom_style charge
|
||||
|
||||
lattice fcc 4.8105 # experimental lattice parameter for fcc-lattice Ca cations
|
||||
|
||||
region box block 0 4 0 4 0 4
|
||||
create_box 2 box
|
||||
create_atoms 1 box
|
||||
|
||||
lattice fcc 4.8105 origin 0.5 0.5 0.5 # O anion lattice shifted by half a unit cell
|
||||
create_atoms 2 box
|
||||
|
||||
mass 1 40.078
|
||||
mass 2 15.999
|
||||
|
||||
displace_atoms all random 0.01 0.01 0.01 9084544
|
||||
set type 1 charge 1.2
|
||||
set type 2 charge -1.2
|
||||
|
||||
timestep 0.002
|
||||
neigh_modify delay 5 every 1 check yes
|
||||
|
||||
pair_style hybrid/overlay pedone 15.0 coul/long 15.0
|
||||
kspace_style pppm 1.0e-6
|
||||
|
||||
pair_coeff * * coul/long
|
||||
pair_coeff 1 2 pedone 0.030211 2.241334 2.923245 5.0
|
||||
pair_coeff 2 2 pedone 0.042395 1.379316 3.618701 22.0
|
||||
|
||||
variable len equal lx*0.25
|
||||
thermo_style custom step v_len lx pe press
|
||||
thermo 100
|
||||
fix 1 all box/relax iso 0.0
|
||||
minimize 0.0 0.0 1000 10000
|
||||
|
||||
print "Expected lattice parameter: 4.7748, computed: $(v_len:%6.4f)"
|
||||
122
examples/PACKAGES/pedone/log.9Apr24.pedone.melt.g++.1
Normal file
122
examples/PACKAGES/pedone/log.9Apr24.pedone.melt.g++.1
Normal file
@ -0,0 +1,122 @@
|
||||
LAMMPS (7 Feb 2024 - Development - patch_7Feb2024_update1-373-g7ac84e18dd)
|
||||
using 1 OpenMP thread(s) per MPI task
|
||||
# Ca-O melt with Pedone potential
|
||||
|
||||
units metal
|
||||
atom_style charge
|
||||
|
||||
lattice fcc 4.8105 # experimental lattice parameter for fcc-lattice Ca cations
|
||||
Lattice spacing in x,y,z = 4.8105 4.8105 4.8105
|
||||
|
||||
region box block 0 4 0 4 0 4
|
||||
create_box 2 box
|
||||
Created orthogonal box = (0 0 0) to (19.242 19.242 19.242)
|
||||
1 by 1 by 1 MPI processor grid
|
||||
create_atoms 1 box
|
||||
Created 256 atoms
|
||||
using lattice units in orthogonal box = (0 0 0) to (19.242 19.242 19.242)
|
||||
create_atoms CPU = 0.000 seconds
|
||||
|
||||
lattice fcc 4.8105 origin 0.5 0.5 0.5 # O anion lattice shifted by half a unit cell
|
||||
Lattice spacing in x,y,z = 4.8105 4.8105 4.8105
|
||||
create_atoms 2 box
|
||||
Created 256 atoms
|
||||
using lattice units in orthogonal box = (0 0 0) to (19.242 19.242 19.242)
|
||||
create_atoms CPU = 0.000 seconds
|
||||
|
||||
mass 1 40.078
|
||||
mass 2 15.999
|
||||
|
||||
set type 1 charge 1.2
|
||||
Setting atom values ...
|
||||
256 settings made for charge
|
||||
set type 2 charge -1.2
|
||||
Setting atom values ...
|
||||
256 settings made for charge
|
||||
|
||||
timestep 0.002
|
||||
neigh_modify delay 5 every 1 check yes
|
||||
|
||||
pair_style hybrid/overlay pedone 15.0 coul/long 15.0
|
||||
kspace_style pppm 1.0e-6
|
||||
|
||||
pair_coeff * * coul/long
|
||||
pair_coeff 1 2 pedone 0.030211 2.241334 2.923245 5.0
|
||||
pair_coeff 2 2 pedone 0.042395 1.379316 3.618701 22.0
|
||||
|
||||
velocity all create 6000.0 98347
|
||||
|
||||
fix 1 all nvt temp 3000.0 3000.0 0.1
|
||||
|
||||
# dump 1 all atom 500 Ca-O-melt.lammpstrj
|
||||
|
||||
thermo 100
|
||||
run 1000
|
||||
PPPM initialization ...
|
||||
using 12-bit tables for long-range coulomb (src/kspace.cpp:342)
|
||||
G vector (1/distance) = 0.23676226
|
||||
grid = 24 24 24
|
||||
stencil order = 5
|
||||
estimated absolute RMS force accuracy = 1.3089053e-05
|
||||
estimated relative force accuracy = 9.089844e-07
|
||||
using double precision FFTW3
|
||||
3d grid and FFT values/proc = 29791 13824
|
||||
Generated 0 of 1 mixed pair_coeff terms from geometric mixing rule
|
||||
Neighbor list info ...
|
||||
update: every = 1 steps, delay = 5 steps, check = yes
|
||||
max neighbors/atom: 2000, page size: 100000
|
||||
master list distance cutoff = 17
|
||||
ghost atom cutoff = 17
|
||||
binsize = 8.5, bins = 3 3 3
|
||||
2 neighbor lists, perpetual/occasional/extra = 2 0 0
|
||||
(1) pair pedone, perpetual, skip from (2)
|
||||
attributes: half, newton on
|
||||
pair build: skip
|
||||
stencil: none
|
||||
bin: none
|
||||
(2) pair coul/long, perpetual
|
||||
attributes: half, newton on
|
||||
pair build: half/bin/atomonly/newton
|
||||
stencil: half/bin/3d
|
||||
bin: standard
|
||||
Per MPI rank memory allocation (min/avg/max) = 9.239 | 9.239 | 9.239 Mbytes
|
||||
Step Temp E_pair E_mol TotEng Press
|
||||
0 6000 -3771.5568 0 -3375.2452 34213.185
|
||||
100 2894.1756 -3562.491 0 -3371.3251 114640.32
|
||||
200 2980.3531 -3570.2657 0 -3373.4076 123673.56
|
||||
300 2783.0437 -3574.5809 0 -3390.7554 119791.27
|
||||
400 3021.6581 -3568.2149 0 -3368.6285 116032.29
|
||||
500 3112.0438 -3580.0178 0 -3374.4613 114798.18
|
||||
600 2973.4609 -3577.0582 0 -3380.6553 111843.46
|
||||
700 3180.1687 -3568.4542 0 -3358.3979 121008.83
|
||||
800 2923.7803 -3573.3023 0 -3380.181 111459.55
|
||||
900 2940.3133 -3572.1322 0 -3377.9188 118177.36
|
||||
1000 3070.2584 -3575.5655 0 -3372.769 114175.52
|
||||
Loop time of 13.683 on 1 procs for 1000 steps with 512 atoms
|
||||
|
||||
Performance: 12.629 ns/day, 1.900 hours/ns, 73.084 timesteps/s, 37.419 katom-step/s
|
||||
99.9% CPU use with 1 MPI tasks x 1 OpenMP threads
|
||||
|
||||
MPI task timing breakdown:
|
||||
Section | min time | avg time | max time |%varavg| %total
|
||||
---------------------------------------------------------------
|
||||
Pair | 11.545 | 11.545 | 11.545 | 0.0 | 84.37
|
||||
Kspace | 1.4121 | 1.4121 | 1.4121 | 0.0 | 10.32
|
||||
Neigh | 0.65265 | 0.65265 | 0.65265 | 0.0 | 4.77
|
||||
Comm | 0.056036 | 0.056036 | 0.056036 | 0.0 | 0.41
|
||||
Output | 0.00022945 | 0.00022945 | 0.00022945 | 0.0 | 0.00
|
||||
Modify | 0.0090252 | 0.0090252 | 0.0090252 | 0.0 | 0.07
|
||||
Other | | 0.00801 | | | 0.06
|
||||
|
||||
Nlocal: 512 ave 512 max 512 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
Nghost: 10901 ave 10901 max 10901 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
Neighs: 374419 ave 374419 max 374419 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
|
||||
Total # of neighbors = 374419
|
||||
Ave neighs/atom = 731.28711
|
||||
Neighbor list builds = 71
|
||||
Dangerous builds = 0
|
||||
Total wall time: 0:00:13
|
||||
122
examples/PACKAGES/pedone/log.9Apr24.pedone.melt.g++.4
Normal file
122
examples/PACKAGES/pedone/log.9Apr24.pedone.melt.g++.4
Normal file
@ -0,0 +1,122 @@
|
||||
LAMMPS (7 Feb 2024 - Development - patch_7Feb2024_update1-373-g7ac84e18dd)
|
||||
using 1 OpenMP thread(s) per MPI task
|
||||
# Ca-O melt with Pedone potential
|
||||
|
||||
units metal
|
||||
atom_style charge
|
||||
|
||||
lattice fcc 4.8105 # experimental lattice parameter for fcc-lattice Ca cations
|
||||
Lattice spacing in x,y,z = 4.8105 4.8105 4.8105
|
||||
|
||||
region box block 0 4 0 4 0 4
|
||||
create_box 2 box
|
||||
Created orthogonal box = (0 0 0) to (19.242 19.242 19.242)
|
||||
1 by 2 by 2 MPI processor grid
|
||||
create_atoms 1 box
|
||||
Created 256 atoms
|
||||
using lattice units in orthogonal box = (0 0 0) to (19.242 19.242 19.242)
|
||||
create_atoms CPU = 0.000 seconds
|
||||
|
||||
lattice fcc 4.8105 origin 0.5 0.5 0.5 # O anion lattice shifted by half a unit cell
|
||||
Lattice spacing in x,y,z = 4.8105 4.8105 4.8105
|
||||
create_atoms 2 box
|
||||
Created 256 atoms
|
||||
using lattice units in orthogonal box = (0 0 0) to (19.242 19.242 19.242)
|
||||
create_atoms CPU = 0.000 seconds
|
||||
|
||||
mass 1 40.078
|
||||
mass 2 15.999
|
||||
|
||||
set type 1 charge 1.2
|
||||
Setting atom values ...
|
||||
256 settings made for charge
|
||||
set type 2 charge -1.2
|
||||
Setting atom values ...
|
||||
256 settings made for charge
|
||||
|
||||
timestep 0.002
|
||||
neigh_modify delay 5 every 1 check yes
|
||||
|
||||
pair_style hybrid/overlay pedone 15.0 coul/long 15.0
|
||||
kspace_style pppm 1.0e-6
|
||||
|
||||
pair_coeff * * coul/long
|
||||
pair_coeff 1 2 pedone 0.030211 2.241334 2.923245 5.0
|
||||
pair_coeff 2 2 pedone 0.042395 1.379316 3.618701 22.0
|
||||
|
||||
velocity all create 6000.0 98347
|
||||
|
||||
fix 1 all nvt temp 3000.0 3000.0 0.1
|
||||
|
||||
# dump 1 all atom 500 Ca-O-melt.lammpstrj
|
||||
|
||||
thermo 100
|
||||
run 1000
|
||||
PPPM initialization ...
|
||||
using 12-bit tables for long-range coulomb (src/kspace.cpp:342)
|
||||
G vector (1/distance) = 0.23676226
|
||||
grid = 24 24 24
|
||||
stencil order = 5
|
||||
estimated absolute RMS force accuracy = 1.3089053e-05
|
||||
estimated relative force accuracy = 9.089844e-07
|
||||
using double precision FFTW3
|
||||
3d grid and FFT values/proc = 11191 3456
|
||||
Generated 0 of 1 mixed pair_coeff terms from geometric mixing rule
|
||||
Neighbor list info ...
|
||||
update: every = 1 steps, delay = 5 steps, check = yes
|
||||
max neighbors/atom: 2000, page size: 100000
|
||||
master list distance cutoff = 17
|
||||
ghost atom cutoff = 17
|
||||
binsize = 8.5, bins = 3 3 3
|
||||
2 neighbor lists, perpetual/occasional/extra = 2 0 0
|
||||
(1) pair pedone, perpetual, skip from (2)
|
||||
attributes: half, newton on
|
||||
pair build: skip
|
||||
stencil: none
|
||||
bin: none
|
||||
(2) pair coul/long, perpetual
|
||||
attributes: half, newton on
|
||||
pair build: half/bin/atomonly/newton
|
||||
stencil: half/bin/3d
|
||||
bin: standard
|
||||
Per MPI rank memory allocation (min/avg/max) = 5.315 | 5.315 | 5.315 Mbytes
|
||||
Step Temp E_pair E_mol TotEng Press
|
||||
0 6000 -3771.5568 0 -3375.2452 34213.185
|
||||
100 3050.0106 -3571.4712 0 -3370.0121 118480.04
|
||||
200 3100.0073 -3571.2534 0 -3366.492 120618.37
|
||||
300 2959.7127 -3580.0883 0 -3384.5935 109184.72
|
||||
400 2922.7083 -3563.9803 0 -3370.9298 120165.71
|
||||
500 3145.0439 -3571.3828 0 -3363.6465 115057.51
|
||||
600 2741.7439 -3563.5077 0 -3382.4102 115504.31
|
||||
700 2906.3636 -3567.3604 0 -3375.3895 119518.5
|
||||
800 2995.3864 -3567.3838 0 -3369.5327 117975.22
|
||||
900 2965.24 -3565.7983 0 -3369.9385 123362.35
|
||||
1000 2916.6485 -3578.7471 0 -3386.0968 115624.78
|
||||
Loop time of 4.50395 on 4 procs for 1000 steps with 512 atoms
|
||||
|
||||
Performance: 38.366 ns/day, 0.626 hours/ns, 222.028 timesteps/s, 113.678 katom-step/s
|
||||
99.4% CPU use with 4 MPI tasks x 1 OpenMP threads
|
||||
|
||||
MPI task timing breakdown:
|
||||
Section | min time | avg time | max time |%varavg| %total
|
||||
---------------------------------------------------------------
|
||||
Pair | 3.2703 | 3.2983 | 3.3259 | 1.3 | 73.23
|
||||
Kspace | 0.79815 | 0.82633 | 0.85342 | 2.6 | 18.35
|
||||
Neigh | 0.18328 | 0.18398 | 0.18472 | 0.1 | 4.08
|
||||
Comm | 0.17423 | 0.17508 | 0.17592 | 0.2 | 3.89
|
||||
Output | 0.00019336 | 0.0002167 | 0.00028554 | 0.0 | 0.00
|
||||
Modify | 0.0089842 | 0.0091093 | 0.0092205 | 0.1 | 0.20
|
||||
Other | | 0.01096 | | | 0.24
|
||||
|
||||
Nlocal: 128 ave 143 max 118 min
|
||||
Histogram: 2 0 0 0 0 1 0 0 0 1
|
||||
Nghost: 7622.75 ave 7651 max 7598 min
|
||||
Histogram: 1 0 0 1 1 0 0 0 0 1
|
||||
Neighs: 93581.8 ave 106456 max 84898 min
|
||||
Histogram: 1 1 0 0 1 0 0 0 0 1
|
||||
|
||||
Total # of neighbors = 374327
|
||||
Ave neighs/atom = 731.10742
|
||||
Neighbor list builds = 71
|
||||
Dangerous builds = 0
|
||||
Total wall time: 0:00:04
|
||||
134
examples/PACKAGES/pedone/log.9Apr24.pedone.relax.g++.1
Normal file
134
examples/PACKAGES/pedone/log.9Apr24.pedone.relax.g++.1
Normal file
@ -0,0 +1,134 @@
|
||||
LAMMPS (7 Feb 2024 - Development - patch_7Feb2024_update1-373-g7ac84e18dd)
|
||||
using 1 OpenMP thread(s) per MPI task
|
||||
# Ca-O crystal with Pedone potential
|
||||
|
||||
units metal
|
||||
atom_style charge
|
||||
|
||||
lattice fcc 4.8105 # experimental lattice parameter for fcc-lattice Ca cations
|
||||
Lattice spacing in x,y,z = 4.8105 4.8105 4.8105
|
||||
|
||||
region box block 0 4 0 4 0 4
|
||||
create_box 2 box
|
||||
Created orthogonal box = (0 0 0) to (19.242 19.242 19.242)
|
||||
1 by 1 by 1 MPI processor grid
|
||||
create_atoms 1 box
|
||||
Created 256 atoms
|
||||
using lattice units in orthogonal box = (0 0 0) to (19.242 19.242 19.242)
|
||||
create_atoms CPU = 0.000 seconds
|
||||
|
||||
lattice fcc 4.8105 origin 0.5 0.5 0.5 # O anion lattice shifted by half a unit cell
|
||||
Lattice spacing in x,y,z = 4.8105 4.8105 4.8105
|
||||
create_atoms 2 box
|
||||
Created 256 atoms
|
||||
using lattice units in orthogonal box = (0 0 0) to (19.242 19.242 19.242)
|
||||
create_atoms CPU = 0.000 seconds
|
||||
|
||||
mass 1 40.078
|
||||
mass 2 15.999
|
||||
|
||||
displace_atoms all random 0.01 0.01 0.01 9084544
|
||||
Displacing atoms ...
|
||||
set type 1 charge 1.2
|
||||
Setting atom values ...
|
||||
256 settings made for charge
|
||||
set type 2 charge -1.2
|
||||
Setting atom values ...
|
||||
256 settings made for charge
|
||||
|
||||
timestep 0.002
|
||||
neigh_modify delay 5 every 1 check yes
|
||||
|
||||
pair_style hybrid/overlay pedone 15.0 coul/long 15.0
|
||||
kspace_style pppm 1.0e-6
|
||||
|
||||
pair_coeff * * coul/long
|
||||
pair_coeff 1 2 pedone 0.030211 2.241334 2.923245 5.0
|
||||
pair_coeff 2 2 pedone 0.042395 1.379316 3.618701 22.0
|
||||
|
||||
variable len equal lx*0.25
|
||||
thermo_style custom step v_len lx pe press
|
||||
thermo 100
|
||||
fix 1 all box/relax iso 0.0
|
||||
minimize 0.0 0.0 1000 10000
|
||||
Switching to 'neigh_modify every 1 delay 0 check yes' setting during minimization
|
||||
PPPM initialization ...
|
||||
using 12-bit tables for long-range coulomb (src/kspace.cpp:342)
|
||||
G vector (1/distance) = 0.23676226
|
||||
grid = 24 24 24
|
||||
stencil order = 5
|
||||
estimated absolute RMS force accuracy = 1.3089053e-05
|
||||
estimated relative force accuracy = 9.089844e-07
|
||||
using double precision FFTW3
|
||||
3d grid and FFT values/proc = 29791 13824
|
||||
Generated 0 of 1 mixed pair_coeff terms from geometric mixing rule
|
||||
Neighbor list info ...
|
||||
update: every = 1 steps, delay = 0 steps, check = yes
|
||||
max neighbors/atom: 2000, page size: 100000
|
||||
master list distance cutoff = 17
|
||||
ghost atom cutoff = 17
|
||||
binsize = 8.5, bins = 3 3 3
|
||||
2 neighbor lists, perpetual/occasional/extra = 2 0 0
|
||||
(1) pair pedone, perpetual, skip from (2)
|
||||
attributes: half, newton on
|
||||
pair build: skip
|
||||
stencil: none
|
||||
bin: none
|
||||
(2) pair coul/long, perpetual
|
||||
attributes: half, newton on
|
||||
pair build: half/bin/atomonly/newton
|
||||
stencil: half/bin/3d
|
||||
bin: standard
|
||||
WARNING: Energy due to 1 extra global DOFs will be included in minimizer energies
|
||||
(src/min.cpp:219)
|
||||
Per MPI rank memory allocation (min/avg/max) = 10.33 | 10.33 | 10.33 Mbytes
|
||||
Step v_len Lx PotEng Press
|
||||
0 4.8105 19.242 -3765.9116 -21299.914
|
||||
100 4.7797128 19.118851 -3767.814 -164.13101
|
||||
200 4.7787507 19.115003 -3769.1366 -373.58797
|
||||
300 4.7768265 19.107306 -3770.5634 48.944709
|
||||
400 4.7768265 19.107306 -3770.9879 -258.56116
|
||||
500 4.7758644 19.103458 -3771.3898 173.91894
|
||||
600 4.7758644 19.103458 -3771.7586 -91.813678
|
||||
700 4.7758644 19.103458 -3771.9842 -252.52883
|
||||
800 4.7749023 19.099609 -3772.3526 216.83318
|
||||
857 4.7747927 19.099171 -3772.8223 32.586251
|
||||
Loop time of 18.0592 on 1 procs for 857 steps with 512 atoms
|
||||
|
||||
99.8% CPU use with 1 MPI tasks x 1 OpenMP threads
|
||||
|
||||
Minimization stats:
|
||||
Stopping criterion = linesearch alpha is zero
|
||||
Energy initial, next-to-last, final =
|
||||
-3765.91161156884 -3772.82226663623 -3772.82226663623
|
||||
Force two-norm initial, final = 284.3967 0.46963871
|
||||
Force max component initial, final = 284.14458 0.42827677
|
||||
Final line search alpha, max atom move = 2.8580337e-08 1.2240294e-08
|
||||
Iterations, force evaluations = 857 894
|
||||
|
||||
MPI task timing breakdown:
|
||||
Section | min time | avg time | max time |%varavg| %total
|
||||
---------------------------------------------------------------
|
||||
Pair | 13.907 | 13.907 | 13.907 | 0.0 | 77.01
|
||||
Kspace | 1.3809 | 1.3809 | 1.3809 | 0.0 | 7.65
|
||||
Neigh | 0 | 0 | 0 | 0.0 | 0.00
|
||||
Comm | 0.045871 | 0.045871 | 0.045871 | 0.0 | 0.25
|
||||
Output | 0.0002809 | 0.0002809 | 0.0002809 | 0.0 | 0.00
|
||||
Modify | 0 | 0 | 0 | 0.0 | 0.00
|
||||
Other | | 2.726 | | | 15.09
|
||||
|
||||
Nlocal: 512 ave 512 max 512 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
Nghost: 11655 ave 11655 max 11655 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
Neighs: 372155 ave 372155 max 372155 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
|
||||
Total # of neighbors = 372155
|
||||
Ave neighs/atom = 726.86523
|
||||
Neighbor list builds = 0
|
||||
Dangerous builds = 0
|
||||
|
||||
print "Expected lattice parameter: 4.7748, computed: $(v_len:%6.4f)"
|
||||
Expected lattice parameter: 4.7748, computed: 4.7748
|
||||
Total wall time: 0:00:18
|
||||
134
examples/PACKAGES/pedone/log.9Apr24.pedone.relax.g++.4
Normal file
134
examples/PACKAGES/pedone/log.9Apr24.pedone.relax.g++.4
Normal file
@ -0,0 +1,134 @@
|
||||
LAMMPS (7 Feb 2024 - Development - patch_7Feb2024_update1-373-g7ac84e18dd)
|
||||
using 1 OpenMP thread(s) per MPI task
|
||||
# Ca-O crystal with Pedone potential
|
||||
|
||||
units metal
|
||||
atom_style charge
|
||||
|
||||
lattice fcc 4.8105 # experimental lattice parameter for fcc-lattice Ca cations
|
||||
Lattice spacing in x,y,z = 4.8105 4.8105 4.8105
|
||||
|
||||
region box block 0 4 0 4 0 4
|
||||
create_box 2 box
|
||||
Created orthogonal box = (0 0 0) to (19.242 19.242 19.242)
|
||||
1 by 2 by 2 MPI processor grid
|
||||
create_atoms 1 box
|
||||
Created 256 atoms
|
||||
using lattice units in orthogonal box = (0 0 0) to (19.242 19.242 19.242)
|
||||
create_atoms CPU = 0.000 seconds
|
||||
|
||||
lattice fcc 4.8105 origin 0.5 0.5 0.5 # O anion lattice shifted by half a unit cell
|
||||
Lattice spacing in x,y,z = 4.8105 4.8105 4.8105
|
||||
create_atoms 2 box
|
||||
Created 256 atoms
|
||||
using lattice units in orthogonal box = (0 0 0) to (19.242 19.242 19.242)
|
||||
create_atoms CPU = 0.000 seconds
|
||||
|
||||
mass 1 40.078
|
||||
mass 2 15.999
|
||||
|
||||
displace_atoms all random 0.01 0.01 0.01 9084544
|
||||
Displacing atoms ...
|
||||
set type 1 charge 1.2
|
||||
Setting atom values ...
|
||||
256 settings made for charge
|
||||
set type 2 charge -1.2
|
||||
Setting atom values ...
|
||||
256 settings made for charge
|
||||
|
||||
timestep 0.002
|
||||
neigh_modify delay 5 every 1 check yes
|
||||
|
||||
pair_style hybrid/overlay pedone 15.0 coul/long 15.0
|
||||
kspace_style pppm 1.0e-6
|
||||
|
||||
pair_coeff * * coul/long
|
||||
pair_coeff 1 2 pedone 0.030211 2.241334 2.923245 5.0
|
||||
pair_coeff 2 2 pedone 0.042395 1.379316 3.618701 22.0
|
||||
|
||||
variable len equal lx*0.25
|
||||
thermo_style custom step v_len lx pe press
|
||||
thermo 100
|
||||
fix 1 all box/relax iso 0.0
|
||||
minimize 0.0 0.0 1000 10000
|
||||
Switching to 'neigh_modify every 1 delay 0 check yes' setting during minimization
|
||||
PPPM initialization ...
|
||||
using 12-bit tables for long-range coulomb (src/kspace.cpp:342)
|
||||
G vector (1/distance) = 0.23676226
|
||||
grid = 24 24 24
|
||||
stencil order = 5
|
||||
estimated absolute RMS force accuracy = 1.3089053e-05
|
||||
estimated relative force accuracy = 9.089844e-07
|
||||
using double precision FFTW3
|
||||
3d grid and FFT values/proc = 11191 3456
|
||||
Generated 0 of 1 mixed pair_coeff terms from geometric mixing rule
|
||||
Neighbor list info ...
|
||||
update: every = 1 steps, delay = 0 steps, check = yes
|
||||
max neighbors/atom: 2000, page size: 100000
|
||||
master list distance cutoff = 17
|
||||
ghost atom cutoff = 17
|
||||
binsize = 8.5, bins = 3 3 3
|
||||
2 neighbor lists, perpetual/occasional/extra = 2 0 0
|
||||
(1) pair pedone, perpetual, skip from (2)
|
||||
attributes: half, newton on
|
||||
pair build: skip
|
||||
stencil: none
|
||||
bin: none
|
||||
(2) pair coul/long, perpetual
|
||||
attributes: half, newton on
|
||||
pair build: half/bin/atomonly/newton
|
||||
stencil: half/bin/3d
|
||||
bin: standard
|
||||
WARNING: Energy due to 1 extra global DOFs will be included in minimizer energies
|
||||
(src/min.cpp:219)
|
||||
Per MPI rank memory allocation (min/avg/max) = 6.44 | 6.44 | 6.44 Mbytes
|
||||
Step v_len Lx PotEng Press
|
||||
0 4.8105 19.242 -3765.9116 -21299.914
|
||||
100 4.7797128 19.118851 -3767.814 -164.13101
|
||||
200 4.7787507 19.115003 -3769.1367 -373.59489
|
||||
300 4.7768265 19.107306 -3770.5868 32.046893
|
||||
400 4.7768265 19.107306 -3771.0322 -290.69703
|
||||
500 4.7758644 19.103458 -3771.4223 150.34606
|
||||
600 4.7758644 19.103458 -3771.7941 -117.26938
|
||||
700 4.7758644 19.103458 -3772.0193 -277.34372
|
||||
800 4.7749023 19.099609 -3772.42 171.95177
|
||||
860 4.7748339 19.099336 -3772.8237 1.0976356
|
||||
Loop time of 5.65601 on 4 procs for 860 steps with 512 atoms
|
||||
|
||||
99.5% CPU use with 4 MPI tasks x 1 OpenMP threads
|
||||
|
||||
Minimization stats:
|
||||
Stopping criterion = linesearch alpha is zero
|
||||
Energy initial, next-to-last, final =
|
||||
-3765.91161156888 -3772.82365446552 -3772.82365446552
|
||||
Force two-norm initial, final = 284.3967 0.067746634
|
||||
Force max component initial, final = 284.14458 0.014426328
|
||||
Final line search alpha, max atom move = 1.9073486e-06 2.7516038e-08
|
||||
Iterations, force evaluations = 860 922
|
||||
|
||||
MPI task timing breakdown:
|
||||
Section | min time | avg time | max time |%varavg| %total
|
||||
---------------------------------------------------------------
|
||||
Pair | 3.7408 | 3.8442 | 4.0543 | 6.5 | 67.97
|
||||
Kspace | 0.60187 | 0.81211 | 0.91543 | 14.1 | 14.36
|
||||
Neigh | 0 | 0 | 0 | 0.0 | 0.00
|
||||
Comm | 0.14969 | 0.15017 | 0.15071 | 0.1 | 2.66
|
||||
Output | 0.00019203 | 0.00020711 | 0.0002511 | 0.0 | 0.00
|
||||
Modify | 0 | 0 | 0 | 0.0 | 0.00
|
||||
Other | | 0.8494 | | | 15.02
|
||||
|
||||
Nlocal: 128 ave 135 max 123 min
|
||||
Histogram: 1 0 1 0 1 0 0 0 0 1
|
||||
Nghost: 8175 ave 8180 max 8168 min
|
||||
Histogram: 1 0 0 0 0 1 0 1 0 1
|
||||
Neighs: 93038.8 ave 98164 max 89373 min
|
||||
Histogram: 1 0 1 0 1 0 0 0 0 1
|
||||
|
||||
Total # of neighbors = 372155
|
||||
Ave neighs/atom = 726.86523
|
||||
Neighbor list builds = 0
|
||||
Dangerous builds = 0
|
||||
|
||||
print "Expected lattice parameter: 4.7748, computed: $(v_len:%6.4f)"
|
||||
Expected lattice parameter: 4.7748, computed: 4.7748
|
||||
Total wall time: 0:00:05
|
||||
@ -1,260 +0,0 @@
|
||||
LAMMPS (29 Mar 2019)
|
||||
using 1 OpenMP thread(s) per MPI task
|
||||
# 2d micelle simulation
|
||||
|
||||
dimension 2
|
||||
|
||||
neighbor 0.3 bin
|
||||
neigh_modify delay 5
|
||||
|
||||
atom_style bond
|
||||
|
||||
# Soft potential push-off
|
||||
|
||||
read_data data.micelle
|
||||
orthogonal box = (0 0 -0.1) to (35.8569 35.8569 0.1)
|
||||
1 by 1 by 1 MPI processor grid
|
||||
reading atoms ...
|
||||
1200 atoms
|
||||
scanning bonds ...
|
||||
1 = max bonds/atom
|
||||
reading bonds ...
|
||||
300 bonds
|
||||
2 = max # of 1-2 neighbors
|
||||
1 = max # of 1-3 neighbors
|
||||
1 = max # of 1-4 neighbors
|
||||
2 = max # of special neighbors
|
||||
special bonds CPU = 0.000473022 secs
|
||||
read_data CPU = 0.0024147 secs
|
||||
special_bonds fene
|
||||
2 = max # of 1-2 neighbors
|
||||
2 = max # of special neighbors
|
||||
special bonds CPU = 0.00022316 secs
|
||||
|
||||
pair_style soft 1.12246
|
||||
pair_coeff * * 0.0 1.12246
|
||||
|
||||
bond_style harmonic
|
||||
bond_coeff 1 50.0 0.75
|
||||
|
||||
velocity all create 0.45 2349852
|
||||
|
||||
variable prefactor equal ramp(1.0,20.0)
|
||||
|
||||
fix 1 all nve
|
||||
fix 2 all temp/rescale 100 0.45 0.45 0.02 1.0
|
||||
fix 3 all adapt 1 pair soft a * * v_prefactor
|
||||
fix 4 all enforce2d
|
||||
|
||||
thermo 50
|
||||
run 500
|
||||
Neighbor list info ...
|
||||
update every 1 steps, delay 5 steps, check yes
|
||||
max neighbors/atom: 2000, page size: 100000
|
||||
master list distance cutoff = 1.42246
|
||||
ghost atom cutoff = 1.42246
|
||||
binsize = 0.71123, bins = 51 51 1
|
||||
1 neighbor lists, perpetual/occasional/extra = 1 0 0
|
||||
(1) pair soft, perpetual
|
||||
attributes: half, newton on
|
||||
pair build: half/bin/newton
|
||||
stencil: half/bin/2d/newton
|
||||
bin: standard
|
||||
Per MPI rank memory allocation (min/avg/max) = 3.799 | 3.799 | 3.799 Mbytes
|
||||
Step Temp E_pair E_mol TotEng Press
|
||||
0 0.45 0.40003481 2.2200223e-06 0.84966203 0.78952518
|
||||
50 0.54981866 0.93548899 0.068440043 1.5532895 1.9232786
|
||||
100 0.45 0.99659327 0.079228519 1.5254468 3.2135679
|
||||
150 0.86965411 0.90456016 0.07493355 1.8484231 4.3821925
|
||||
200 0.45 1.01454 0.10663502 1.5708 4.7598476
|
||||
250 0.79636561 0.82567712 0.12105337 1.7424325 5.4983899
|
||||
300 0.45 0.86475538 0.11819875 1.4325791 5.8554758
|
||||
350 0.72135464 0.70693069 0.10912636 1.5368106 6.0388247
|
||||
400 0.45 0.75067331 0.14165013 1.3419484 6.3840708
|
||||
450 0.64839221 0.62402486 0.14173679 1.4136135 6.4791009
|
||||
500 0.45 0.66669513 0.13695201 1.2532721 6.807146
|
||||
Loop time of 0.103162 on 1 procs for 500 steps with 1200 atoms
|
||||
|
||||
Performance: 2093802.885 tau/day, 4846.766 timesteps/s
|
||||
99.6% CPU use with 1 MPI tasks x 1 OpenMP threads
|
||||
|
||||
MPI task timing breakdown:
|
||||
Section | min time | avg time | max time |%varavg| %total
|
||||
---------------------------------------------------------------
|
||||
Pair | 0.068308 | 0.068308 | 0.068308 | 0.0 | 66.21
|
||||
Bond | 0.004235 | 0.004235 | 0.004235 | 0.0 | 4.11
|
||||
Neigh | 0.014069 | 0.014069 | 0.014069 | 0.0 | 13.64
|
||||
Comm | 0.0019219 | 0.0019219 | 0.0019219 | 0.0 | 1.86
|
||||
Output | 0.00017262 | 0.00017262 | 0.00017262 | 0.0 | 0.17
|
||||
Modify | 0.011728 | 0.011728 | 0.011728 | 0.0 | 11.37
|
||||
Other | | 0.002726 | | | 2.64
|
||||
|
||||
Nlocal: 1200 ave 1200 max 1200 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
Nghost: 197 ave 197 max 197 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
Neighs: 3094 ave 3094 max 3094 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
|
||||
Total # of neighbors = 3094
|
||||
Ave neighs/atom = 2.57833
|
||||
Ave special neighs/atom = 0.5
|
||||
Neighbor list builds = 52
|
||||
Dangerous builds = 0
|
||||
|
||||
unfix 3
|
||||
|
||||
# Main run
|
||||
|
||||
pair_style lj/cut 2.5
|
||||
|
||||
# solvent/head - full-size and long-range
|
||||
|
||||
pair_coeff 1 1 1.0 1.0 2.5
|
||||
pair_coeff 2 2 1.0 1.0 2.5
|
||||
pair_coeff 1 2 1.0 1.0 2.5
|
||||
|
||||
# tail/tail - size-averaged and long-range
|
||||
|
||||
pair_coeff 3 3 1.0 0.75 2.5
|
||||
pair_coeff 4 4 1.0 0.50 2.5
|
||||
pair_coeff 3 4 1.0 0.67 2.5
|
||||
|
||||
# solvent/tail - full-size and repulsive
|
||||
|
||||
pair_coeff 1 3 1.0 1.0 1.12246
|
||||
pair_coeff 1 4 1.0 1.0 1.12246
|
||||
|
||||
# head/tail - size-averaged and repulsive
|
||||
|
||||
pair_coeff 2 3 1.0 0.88 1.12246
|
||||
pair_coeff 2 4 1.0 0.75 1.12246
|
||||
|
||||
thermo 50
|
||||
|
||||
#dump 1 all atom 2000 dump.micelle
|
||||
|
||||
#dump 2 all image 2000 image.*.jpg type type zoom 1.6
|
||||
#dump_modify 2 pad 5 adiam 1 0.5 adiam 2 1.5 adiam 3 1.0 adiam 4 0.75
|
||||
|
||||
#dump 3 all movie 2000 movie.mpg type type zoom 1.6
|
||||
#dump_modify 3 pad 5 adiam 1 0.5 adiam 2 1.5 adiam 3 1.0 adiam 4 0.75
|
||||
|
||||
reset_timestep 0
|
||||
group solvent molecule 0
|
||||
750 atoms in group solvent
|
||||
group solute subtract all solvent
|
||||
450 atoms in group solute
|
||||
unfix 1
|
||||
unfix 2
|
||||
unfix 4
|
||||
fix 1 solvent nve
|
||||
fix 2 solvent temp/rescale 100 0.45 0.45 0.02 1.0
|
||||
fix 5 solute rigid molecule langevin 0.45 0.45 0.5 112211
|
||||
150 rigid bodies with 450 atoms
|
||||
fix 4 all enforce2d
|
||||
run 500
|
||||
Neighbor list info ...
|
||||
update every 1 steps, delay 5 steps, check yes
|
||||
max neighbors/atom: 2000, page size: 100000
|
||||
master list distance cutoff = 2.8
|
||||
ghost atom cutoff = 2.8
|
||||
binsize = 1.4, bins = 26 26 1
|
||||
1 neighbor lists, perpetual/occasional/extra = 1 0 0
|
||||
(1) pair lj/cut, perpetual
|
||||
attributes: half, newton on
|
||||
pair build: half/bin/newton
|
||||
stencil: half/bin/2d/newton
|
||||
bin: standard
|
||||
Per MPI rank memory allocation (min/avg/max) = 5.274 | 5.274 | 5.274 Mbytes
|
||||
Step Temp E_pair E_mol TotEng Press
|
||||
0 0.45318168 -1.3753652 0.13695201 -0.8705807 1.975423
|
||||
50 0.77871641 -1.6955252 0.13695201 -0.92651507 0.64222539
|
||||
100 0.5336062 -1.7124572 0.13695201 -1.1423948 -0.11959696
|
||||
150 0.58789067 -1.7926109 0.13695201 -1.1784877 1.2592743
|
||||
200 0.47864796 -1.8040298 0.13695201 -1.2785752 3.6739793
|
||||
250 0.51124651 -1.8614797 0.13695201 -1.309566 2.5817722
|
||||
300 0.45695639 -1.8708384 0.13695201 -1.3629901 3.0833794
|
||||
350 0.477504 -1.8924359 0.13695201 -1.3679098 -5.1605926
|
||||
400 0.45328205 -1.87754 0.13695201 -1.372674 -4.0355858
|
||||
450 0.47465031 -1.9071924 0.13695201 -1.3849826 3.1949617
|
||||
500 0.45533691 -1.9072316 0.13695201 -1.4006978 0.48079061
|
||||
Loop time of 0.178806 on 1 procs for 500 steps with 1200 atoms
|
||||
|
||||
Performance: 1208012.705 tau/day, 2796.326 timesteps/s
|
||||
99.6% CPU use with 1 MPI tasks x 1 OpenMP threads
|
||||
|
||||
MPI task timing breakdown:
|
||||
Section | min time | avg time | max time |%varavg| %total
|
||||
---------------------------------------------------------------
|
||||
Pair | 0.086131 | 0.086131 | 0.086131 | 0.0 | 48.17
|
||||
Bond | 0.0042472 | 0.0042472 | 0.0042472 | 0.0 | 2.38
|
||||
Neigh | 0.021317 | 0.021317 | 0.021317 | 0.0 | 11.92
|
||||
Comm | 0.0025985 | 0.0025985 | 0.0025985 | 0.0 | 1.45
|
||||
Output | 0.000175 | 0.000175 | 0.000175 | 0.0 | 0.10
|
||||
Modify | 0.061408 | 0.061408 | 0.061408 | 0.0 | 34.34
|
||||
Other | | 0.00293 | | | 1.64
|
||||
|
||||
Nlocal: 1200 ave 1200 max 1200 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
Nghost: 416 ave 416 max 416 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
Neighs: 8769 ave 8769 max 8769 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
|
||||
Total # of neighbors = 8769
|
||||
Ave neighs/atom = 7.3075
|
||||
Ave special neighs/atom = 0.5
|
||||
Neighbor list builds = 47
|
||||
Dangerous builds = 2
|
||||
unfix 2
|
||||
unfix 4
|
||||
unfix 5
|
||||
fix 5 solute rigid/small molecule
|
||||
create bodies CPU = 0.00015378 secs
|
||||
150 rigid bodies with 450 atoms
|
||||
1.30435 = max distance from body owner to body atom
|
||||
fix 4 all enforce2d
|
||||
run 500
|
||||
Per MPI rank memory allocation (min/avg/max) = 8.64 | 8.64 | 8.64 Mbytes
|
||||
Step Temp E_pair E_mol TotEng Press
|
||||
500 0.45533691 -1.9072316 0.13695201 -1.4006978 2.4545793
|
||||
550 0.45627282 -1.912409 0.13695201 -1.4051155 2.1845065
|
||||
600 0.44734553 -1.8890695 0.13695201 -1.389022 2.3458965
|
||||
650 0.46444648 -1.9042462 0.13695201 -1.3903185 2.1609319
|
||||
700 0.47113236 -1.8977576 0.13695201 -1.3784032 2.2420351
|
||||
750 0.48554548 -1.9253545 0.13695201 -1.3943015 2.143907
|
||||
800 0.46350091 -1.8865749 0.13695201 -1.3734146 2.294431
|
||||
850 0.4766104 -1.9094039 0.13695201 -1.3856031 2.2077157
|
||||
900 0.48988467 -1.9051538 0.13695201 -1.3705787 2.0107056
|
||||
950 0.48351943 -1.9162485 0.13695201 -1.3868399 2.1891332
|
||||
1000 0.49033701 -1.9115165 0.13695201 -1.3765742 2.1508141
|
||||
Loop time of 0.166502 on 1 procs for 500 steps with 1200 atoms
|
||||
|
||||
Performance: 1297278.008 tau/day, 3002.958 timesteps/s
|
||||
99.6% CPU use with 1 MPI tasks x 1 OpenMP threads
|
||||
|
||||
MPI task timing breakdown:
|
||||
Section | min time | avg time | max time |%varavg| %total
|
||||
---------------------------------------------------------------
|
||||
Pair | 0.085767 | 0.085767 | 0.085767 | 0.0 | 51.51
|
||||
Bond | 0.0042562 | 0.0042562 | 0.0042562 | 0.0 | 2.56
|
||||
Neigh | 0.018039 | 0.018039 | 0.018039 | 0.0 | 10.83
|
||||
Comm | 0.0024002 | 0.0024002 | 0.0024002 | 0.0 | 1.44
|
||||
Output | 0.00018239 | 0.00018239 | 0.00018239 | 0.0 | 0.11
|
||||
Modify | 0.052717 | 0.052717 | 0.052717 | 0.0 | 31.66
|
||||
Other | | 0.003141 | | | 1.89
|
||||
|
||||
Nlocal: 1200 ave 1200 max 1200 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
Nghost: 415 ave 415 max 415 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
Neighs: 8743 ave 8743 max 8743 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
|
||||
Total # of neighbors = 8743
|
||||
Ave neighs/atom = 7.28583
|
||||
Ave special neighs/atom = 0.5
|
||||
Neighbor list builds = 40
|
||||
Dangerous builds = 0
|
||||
Total wall time: 0:00:00
|
||||
@ -1,260 +0,0 @@
|
||||
LAMMPS (29 Mar 2019)
|
||||
using 1 OpenMP thread(s) per MPI task
|
||||
# 2d micelle simulation
|
||||
|
||||
dimension 2
|
||||
|
||||
neighbor 0.3 bin
|
||||
neigh_modify delay 5
|
||||
|
||||
atom_style bond
|
||||
|
||||
# Soft potential push-off
|
||||
|
||||
read_data data.micelle
|
||||
orthogonal box = (0 0 -0.1) to (35.8569 35.8569 0.1)
|
||||
2 by 2 by 1 MPI processor grid
|
||||
reading atoms ...
|
||||
1200 atoms
|
||||
scanning bonds ...
|
||||
1 = max bonds/atom
|
||||
reading bonds ...
|
||||
300 bonds
|
||||
2 = max # of 1-2 neighbors
|
||||
1 = max # of 1-3 neighbors
|
||||
1 = max # of 1-4 neighbors
|
||||
2 = max # of special neighbors
|
||||
special bonds CPU = 0.000422001 secs
|
||||
read_data CPU = 0.00473404 secs
|
||||
special_bonds fene
|
||||
2 = max # of 1-2 neighbors
|
||||
2 = max # of special neighbors
|
||||
special bonds CPU = 0.000183344 secs
|
||||
|
||||
pair_style soft 1.12246
|
||||
pair_coeff * * 0.0 1.12246
|
||||
|
||||
bond_style harmonic
|
||||
bond_coeff 1 50.0 0.75
|
||||
|
||||
velocity all create 0.45 2349852
|
||||
|
||||
variable prefactor equal ramp(1.0,20.0)
|
||||
|
||||
fix 1 all nve
|
||||
fix 2 all temp/rescale 100 0.45 0.45 0.02 1.0
|
||||
fix 3 all adapt 1 pair soft a * * v_prefactor
|
||||
fix 4 all enforce2d
|
||||
|
||||
thermo 50
|
||||
run 500
|
||||
Neighbor list info ...
|
||||
update every 1 steps, delay 5 steps, check yes
|
||||
max neighbors/atom: 2000, page size: 100000
|
||||
master list distance cutoff = 1.42246
|
||||
ghost atom cutoff = 1.42246
|
||||
binsize = 0.71123, bins = 51 51 1
|
||||
1 neighbor lists, perpetual/occasional/extra = 1 0 0
|
||||
(1) pair soft, perpetual
|
||||
attributes: half, newton on
|
||||
pair build: half/bin/newton
|
||||
stencil: half/bin/2d/newton
|
||||
bin: standard
|
||||
Per MPI rank memory allocation (min/avg/max) = 3.758 | 3.85 | 4.126 Mbytes
|
||||
Step Temp E_pair E_mol TotEng Press
|
||||
0 0.45 0.40003481 2.2200223e-06 0.84966203 0.78952518
|
||||
50 0.54981866 0.93548899 0.068440043 1.5532895 1.9232786
|
||||
100 0.45 0.99659327 0.079228519 1.5254468 3.2135679
|
||||
150 0.86965411 0.90456016 0.07493355 1.8484231 4.3821925
|
||||
200 0.45 1.01454 0.10663502 1.5708 4.7598476
|
||||
250 0.79636561 0.82567712 0.12105337 1.7424325 5.4983899
|
||||
300 0.45 0.86475538 0.11819875 1.4325791 5.8554758
|
||||
350 0.72135464 0.70693069 0.10912636 1.5368106 6.0388247
|
||||
400 0.45 0.75067331 0.14165013 1.3419484 6.3840708
|
||||
450 0.64839221 0.62402486 0.14173679 1.4136135 6.4791009
|
||||
500 0.45 0.66669513 0.13695201 1.2532721 6.807146
|
||||
Loop time of 0.0426326 on 4 procs for 500 steps with 1200 atoms
|
||||
|
||||
Performance: 5066547.720 tau/day, 11728.120 timesteps/s
|
||||
98.7% CPU use with 4 MPI tasks x 1 OpenMP threads
|
||||
|
||||
MPI task timing breakdown:
|
||||
Section | min time | avg time | max time |%varavg| %total
|
||||
---------------------------------------------------------------
|
||||
Pair | 0.016784 | 0.019254 | 0.022154 | 1.5 | 45.16
|
||||
Bond | 0.0010612 | 0.0012558 | 0.0014153 | 0.4 | 2.95
|
||||
Neigh | 0.0046048 | 0.0046697 | 0.0047245 | 0.1 | 10.95
|
||||
Comm | 0.0064592 | 0.0097114 | 0.012527 | 2.4 | 22.78
|
||||
Output | 0.00022507 | 0.00026393 | 0.00033951 | 0.0 | 0.62
|
||||
Modify | 0.0041659 | 0.0048084 | 0.0053945 | 0.8 | 11.28
|
||||
Other | | 0.002669 | | | 6.26
|
||||
|
||||
Nlocal: 300 ave 304 max 292 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 2 1
|
||||
Nghost: 103.5 ave 108 max 98 min
|
||||
Histogram: 1 0 0 1 0 0 0 0 0 2
|
||||
Neighs: 773.5 ave 792 max 735 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 2 1
|
||||
|
||||
Total # of neighbors = 3094
|
||||
Ave neighs/atom = 2.57833
|
||||
Ave special neighs/atom = 0.5
|
||||
Neighbor list builds = 52
|
||||
Dangerous builds = 0
|
||||
|
||||
unfix 3
|
||||
|
||||
# Main run
|
||||
|
||||
pair_style lj/cut 2.5
|
||||
|
||||
# solvent/head - full-size and long-range
|
||||
|
||||
pair_coeff 1 1 1.0 1.0 2.5
|
||||
pair_coeff 2 2 1.0 1.0 2.5
|
||||
pair_coeff 1 2 1.0 1.0 2.5
|
||||
|
||||
# tail/tail - size-averaged and long-range
|
||||
|
||||
pair_coeff 3 3 1.0 0.75 2.5
|
||||
pair_coeff 4 4 1.0 0.50 2.5
|
||||
pair_coeff 3 4 1.0 0.67 2.5
|
||||
|
||||
# solvent/tail - full-size and repulsive
|
||||
|
||||
pair_coeff 1 3 1.0 1.0 1.12246
|
||||
pair_coeff 1 4 1.0 1.0 1.12246
|
||||
|
||||
# head/tail - size-averaged and repulsive
|
||||
|
||||
pair_coeff 2 3 1.0 0.88 1.12246
|
||||
pair_coeff 2 4 1.0 0.75 1.12246
|
||||
|
||||
thermo 50
|
||||
|
||||
#dump 1 all atom 2000 dump.micelle
|
||||
|
||||
#dump 2 all image 2000 image.*.jpg type type zoom 1.6
|
||||
#dump_modify 2 pad 5 adiam 1 0.5 adiam 2 1.5 adiam 3 1.0 adiam 4 0.75
|
||||
|
||||
#dump 3 all movie 2000 movie.mpg type type zoom 1.6
|
||||
#dump_modify 3 pad 5 adiam 1 0.5 adiam 2 1.5 adiam 3 1.0 adiam 4 0.75
|
||||
|
||||
reset_timestep 0
|
||||
group solvent molecule 0
|
||||
750 atoms in group solvent
|
||||
group solute subtract all solvent
|
||||
450 atoms in group solute
|
||||
unfix 1
|
||||
unfix 2
|
||||
unfix 4
|
||||
fix 1 solvent nve
|
||||
fix 2 solvent temp/rescale 100 0.45 0.45 0.02 1.0
|
||||
fix 5 solute rigid molecule langevin 0.45 0.45 0.5 112211
|
||||
150 rigid bodies with 450 atoms
|
||||
fix 4 all enforce2d
|
||||
run 500
|
||||
Neighbor list info ...
|
||||
update every 1 steps, delay 5 steps, check yes
|
||||
max neighbors/atom: 2000, page size: 100000
|
||||
master list distance cutoff = 2.8
|
||||
ghost atom cutoff = 2.8
|
||||
binsize = 1.4, bins = 26 26 1
|
||||
1 neighbor lists, perpetual/occasional/extra = 1 0 0
|
||||
(1) pair lj/cut, perpetual
|
||||
attributes: half, newton on
|
||||
pair build: half/bin/newton
|
||||
stencil: half/bin/2d/newton
|
||||
bin: standard
|
||||
Per MPI rank memory allocation (min/avg/max) = 5.251 | 5.282 | 5.374 Mbytes
|
||||
Step Temp E_pair E_mol TotEng Press
|
||||
0 0.45318168 -1.3753652 0.13695201 -0.8705807 1.975423
|
||||
50 0.77871641 -1.6955252 0.13695201 -0.92651507 0.64222539
|
||||
100 0.5336062 -1.7124572 0.13695201 -1.1423948 -0.11959696
|
||||
150 0.58789067 -1.7926109 0.13695201 -1.1784877 1.2592743
|
||||
200 0.47864796 -1.8040298 0.13695201 -1.2785752 3.6739793
|
||||
250 0.51124651 -1.8614797 0.13695201 -1.309566 2.5817722
|
||||
300 0.45695639 -1.8708384 0.13695201 -1.3629901 3.0833794
|
||||
350 0.477504 -1.8924359 0.13695201 -1.3679098 -5.1605926
|
||||
400 0.45328205 -1.87754 0.13695201 -1.372674 -4.0355858
|
||||
450 0.47465031 -1.9071924 0.13695201 -1.3849826 3.1949617
|
||||
500 0.45533691 -1.9072316 0.13695201 -1.4006978 0.48079061
|
||||
Loop time of 0.0887392 on 4 procs for 500 steps with 1200 atoms
|
||||
|
||||
Performance: 2434100.210 tau/day, 5634.491 timesteps/s
|
||||
98.9% CPU use with 4 MPI tasks x 1 OpenMP threads
|
||||
|
||||
MPI task timing breakdown:
|
||||
Section | min time | avg time | max time |%varavg| %total
|
||||
---------------------------------------------------------------
|
||||
Pair | 0.022611 | 0.022839 | 0.023082 | 0.1 | 25.74
|
||||
Bond | 0.0010793 | 0.0011569 | 0.0012515 | 0.2 | 1.30
|
||||
Neigh | 0.0064609 | 0.0064996 | 0.0065265 | 0.0 | 7.32
|
||||
Comm | 0.0071712 | 0.0073687 | 0.0077734 | 0.3 | 8.30
|
||||
Output | 0.00023389 | 0.00025356 | 0.00030327 | 0.0 | 0.29
|
||||
Modify | 0.047258 | 0.047683 | 0.048503 | 0.2 | 53.73
|
||||
Other | | 0.002938 | | | 3.31
|
||||
|
||||
Nlocal: 300 ave 309 max 291 min
|
||||
Histogram: 1 0 0 1 0 0 1 0 0 1
|
||||
Nghost: 218.75 ave 223 max 216 min
|
||||
Histogram: 1 0 2 0 0 0 0 0 0 1
|
||||
Neighs: 2192.25 ave 2251 max 2113 min
|
||||
Histogram: 1 0 0 1 0 0 0 0 0 2
|
||||
|
||||
Total # of neighbors = 8769
|
||||
Ave neighs/atom = 7.3075
|
||||
Ave special neighs/atom = 0.5
|
||||
Neighbor list builds = 47
|
||||
Dangerous builds = 2
|
||||
unfix 2
|
||||
unfix 4
|
||||
unfix 5
|
||||
fix 5 solute rigid/small molecule
|
||||
create bodies CPU = 7.70092e-05 secs
|
||||
150 rigid bodies with 450 atoms
|
||||
1.30435 = max distance from body owner to body atom
|
||||
fix 4 all enforce2d
|
||||
run 500
|
||||
Per MPI rank memory allocation (min/avg/max) = 8.565 | 8.597 | 8.69 Mbytes
|
||||
Step Temp E_pair E_mol TotEng Press
|
||||
500 0.45533691 -1.9072316 0.13695201 -1.4006978 2.4545793
|
||||
550 0.45627282 -1.912409 0.13695201 -1.4051155 2.1845065
|
||||
600 0.44734553 -1.8890695 0.13695201 -1.389022 2.3458965
|
||||
650 0.46444648 -1.9042462 0.13695201 -1.3903185 2.1609319
|
||||
700 0.47113236 -1.8977576 0.13695201 -1.3784032 2.2420351
|
||||
750 0.48554548 -1.9253545 0.13695201 -1.3943015 2.143907
|
||||
800 0.46350091 -1.8865749 0.13695201 -1.3734146 2.294431
|
||||
850 0.4766104 -1.9094039 0.13695201 -1.3856031 2.2077157
|
||||
900 0.48988467 -1.9051538 0.13695201 -1.3705787 2.0107056
|
||||
950 0.48351942 -1.9162485 0.13695201 -1.3868399 2.1891332
|
||||
1000 0.490337 -1.9115164 0.13695201 -1.3765742 2.1508141
|
||||
Loop time of 0.0588261 on 4 procs for 500 steps with 1200 atoms
|
||||
|
||||
Performance: 3671840.233 tau/day, 8499.630 timesteps/s
|
||||
98.3% CPU use with 4 MPI tasks x 1 OpenMP threads
|
||||
|
||||
MPI task timing breakdown:
|
||||
Section | min time | avg time | max time |%varavg| %total
|
||||
---------------------------------------------------------------
|
||||
Pair | 0.022407 | 0.022631 | 0.0229 | 0.1 | 38.47
|
||||
Bond | 0.0010669 | 0.0011355 | 0.0012124 | 0.2 | 1.93
|
||||
Neigh | 0.0052333 | 0.00528 | 0.0053182 | 0.0 | 8.98
|
||||
Comm | 0.0063677 | 0.0066406 | 0.0068488 | 0.2 | 11.29
|
||||
Output | 0.00023055 | 0.00024778 | 0.00028086 | 0.0 | 0.42
|
||||
Modify | 0.020577 | 0.020651 | 0.020834 | 0.1 | 35.11
|
||||
Other | | 0.00224 | | | 3.81
|
||||
|
||||
Nlocal: 300 ave 303 max 295 min
|
||||
Histogram: 1 0 0 0 0 0 1 0 1 1
|
||||
Nghost: 219 ave 224 max 215 min
|
||||
Histogram: 1 0 0 1 1 0 0 0 0 1
|
||||
Neighs: 2185.75 ave 2244 max 2143 min
|
||||
Histogram: 1 1 0 0 0 1 0 0 0 1
|
||||
|
||||
Total # of neighbors = 8743
|
||||
Ave neighs/atom = 7.28583
|
||||
Ave special neighs/atom = 0.5
|
||||
Neighbor list builds = 40
|
||||
Dangerous builds = 0
|
||||
Total wall time: 0:00:00
|
||||
271
examples/micelle/log.4Apr2024.micelle-rigid.g++.1
Normal file
271
examples/micelle/log.4Apr2024.micelle-rigid.g++.1
Normal file
@ -0,0 +1,271 @@
|
||||
LAMMPS (7 Feb 2024 - Development - patch_7Feb2024_update1-345-g506bf886ee-modified)
|
||||
# 2d micelle simulation
|
||||
|
||||
dimension 2
|
||||
|
||||
neighbor 0.3 bin
|
||||
neigh_modify delay 5
|
||||
|
||||
atom_style bond
|
||||
|
||||
# Soft potential push-off
|
||||
|
||||
read_data data.micelle
|
||||
Reading data file ...
|
||||
orthogonal box = (0 0 -0.1) to (35.85686 35.85686 0.1)
|
||||
1 by 1 by 1 MPI processor grid
|
||||
reading atoms ...
|
||||
1200 atoms
|
||||
scanning bonds ...
|
||||
1 = max bonds/atom
|
||||
reading bonds ...
|
||||
300 bonds
|
||||
Finding 1-2 1-3 1-4 neighbors ...
|
||||
special bond factors lj: 0 0 0
|
||||
special bond factors coul: 0 0 0
|
||||
2 = max # of 1-2 neighbors
|
||||
1 = max # of 1-3 neighbors
|
||||
1 = max # of 1-4 neighbors
|
||||
2 = max # of special neighbors
|
||||
special bonds CPU = 0.000 seconds
|
||||
read_data CPU = 0.005 seconds
|
||||
special_bonds fene
|
||||
Finding 1-2 1-3 1-4 neighbors ...
|
||||
special bond factors lj: 0 1 1
|
||||
special bond factors coul: 0 1 1
|
||||
2 = max # of 1-2 neighbors
|
||||
2 = max # of special neighbors
|
||||
special bonds CPU = 0.000 seconds
|
||||
|
||||
pair_style soft 1.12246
|
||||
pair_coeff * * 0.0 1.12246
|
||||
|
||||
bond_style harmonic
|
||||
bond_coeff 1 50.0 0.75
|
||||
|
||||
velocity all create 0.45 2349852
|
||||
|
||||
variable prefactor equal ramp(1.0,20.0)
|
||||
|
||||
fix 1 all nve
|
||||
fix 2 all temp/rescale 100 0.45 0.45 0.02 1.0
|
||||
fix 3 all adapt 1 pair soft a * * v_prefactor
|
||||
fix 4 all enforce2d
|
||||
|
||||
thermo 50
|
||||
run 500
|
||||
Generated 0 of 6 mixed pair_coeff terms from geometric mixing rule
|
||||
WARNING: Communication cutoff 1.42246 is shorter than a bond length based estimate of 1.425. This may lead to errors. (../comm.cpp:730)
|
||||
Neighbor list info ...
|
||||
update: every = 1 steps, delay = 5 steps, check = yes
|
||||
max neighbors/atom: 2000, page size: 100000
|
||||
master list distance cutoff = 1.42246
|
||||
ghost atom cutoff = 1.42246
|
||||
binsize = 0.71123, bins = 51 51 1
|
||||
1 neighbor lists, perpetual/occasional/extra = 1 0 0
|
||||
(1) pair soft, perpetual
|
||||
attributes: half, newton on
|
||||
pair build: half/bin/newton
|
||||
stencil: half/bin/2d
|
||||
bin: standard
|
||||
WARNING: Communication cutoff 1.42246 is shorter than a bond length based estimate of 1.425. This may lead to errors. (../comm.cpp:730)
|
||||
Per MPI rank memory allocation (min/avg/max) = 4.148 | 4.148 | 4.148 Mbytes
|
||||
Step Temp E_pair E_mol TotEng Press
|
||||
0 0.45 0.40003481 2.2200223e-06 0.84966203 0.78952518
|
||||
50 0.54981866 0.93548899 0.068440043 1.5532895 1.9232786
|
||||
100 0.45 0.99659327 0.079228519 1.5254468 3.2135679
|
||||
150 0.86965411 0.90456016 0.07493355 1.8484231 4.3821925
|
||||
200 0.45 1.01454 0.10663502 1.5708 4.7598476
|
||||
250 0.79636561 0.82567712 0.12105337 1.7424325 5.4983899
|
||||
300 0.45 0.86475538 0.11819875 1.4325791 5.8554758
|
||||
350 0.72135464 0.70693069 0.10912636 1.5368106 6.0388247
|
||||
400 0.45 0.75067331 0.14165013 1.3419484 6.3840708
|
||||
450 0.64839221 0.62402486 0.14173679 1.4136135 6.4791009
|
||||
500 0.45 0.66669513 0.13695201 1.2532721 6.807146
|
||||
Loop time of 0.0365221 on 1 procs for 500 steps with 1200 atoms
|
||||
|
||||
Performance: 5914221.123 tau/day, 13690.327 timesteps/s, 16.428 Matom-step/s
|
||||
89.2% CPU use with 1 MPI tasks x no OpenMP threads
|
||||
|
||||
MPI task timing breakdown:
|
||||
Section | min time | avg time | max time |%varavg| %total
|
||||
---------------------------------------------------------------
|
||||
Pair | 0.022939 | 0.022939 | 0.022939 | 0.0 | 62.81
|
||||
Bond | 0.00073851 | 0.00073851 | 0.00073851 | 0.0 | 2.02
|
||||
Neigh | 0.0078339 | 0.0078339 | 0.0078339 | 0.0 | 21.45
|
||||
Comm | 0.00072134 | 0.00072134 | 0.00072134 | 0.0 | 1.98
|
||||
Output | 7.1419e-05 | 7.1419e-05 | 7.1419e-05 | 0.0 | 0.20
|
||||
Modify | 0.0034868 | 0.0034868 | 0.0034868 | 0.0 | 9.55
|
||||
Other | | 0.0007314 | | | 2.00
|
||||
|
||||
Nlocal: 1200 ave 1200 max 1200 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
Nghost: 197 ave 197 max 197 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
Neighs: 3094 ave 3094 max 3094 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
|
||||
Total # of neighbors = 3094
|
||||
Ave neighs/atom = 2.5783333
|
||||
Ave special neighs/atom = 0.5
|
||||
Neighbor list builds = 52
|
||||
Dangerous builds = 0
|
||||
|
||||
unfix 3
|
||||
|
||||
# Main run
|
||||
|
||||
pair_style lj/cut 2.5
|
||||
|
||||
# solvent/head - full-size and long-range
|
||||
|
||||
pair_coeff 1 1 1.0 1.0 2.5
|
||||
pair_coeff 2 2 1.0 1.0 2.5
|
||||
pair_coeff 1 2 1.0 1.0 2.5
|
||||
|
||||
# tail/tail - size-averaged and long-range
|
||||
|
||||
pair_coeff 3 3 1.0 0.75 2.5
|
||||
pair_coeff 4 4 1.0 0.50 2.5
|
||||
pair_coeff 3 4 1.0 0.67 2.5
|
||||
|
||||
# solvent/tail - full-size and repulsive
|
||||
|
||||
pair_coeff 1 3 1.0 1.0 1.12246
|
||||
pair_coeff 1 4 1.0 1.0 1.12246
|
||||
|
||||
# head/tail - size-averaged and repulsive
|
||||
|
||||
pair_coeff 2 3 1.0 0.88 1.12246
|
||||
pair_coeff 2 4 1.0 0.75 1.12246
|
||||
|
||||
thermo 50
|
||||
|
||||
#dump 1 all atom 2000 dump.micelle
|
||||
|
||||
#dump 2 all image 2000 image.*.jpg type type zoom 1.6
|
||||
#dump_modify 2 pad 5 adiam 1 0.5 adiam 2 1.5 adiam 3 1.0 adiam 4 0.75
|
||||
|
||||
#dump 3 all movie 2000 movie.mpg type type zoom 1.6
|
||||
#dump_modify 3 pad 5 adiam 1 0.5 adiam 2 1.5 adiam 3 1.0 adiam 4 0.75
|
||||
|
||||
reset_timestep 0
|
||||
group solvent molecule 0
|
||||
750 atoms in group solvent
|
||||
group solute subtract all solvent
|
||||
450 atoms in group solute
|
||||
unfix 1
|
||||
unfix 2
|
||||
unfix 4
|
||||
fix 1 solvent nve
|
||||
fix 2 solvent temp/rescale 100 0.45 0.45 0.02 1.0
|
||||
fix 5 solute rigid molecule langevin 0.45 0.45 0.5 112211
|
||||
150 rigid bodies with 450 atoms
|
||||
fix 4 all enforce2d
|
||||
run 500
|
||||
Generated 0 of 6 mixed pair_coeff terms from geometric mixing rule
|
||||
Neighbor list info ...
|
||||
update: every = 1 steps, delay = 5 steps, check = yes
|
||||
max neighbors/atom: 2000, page size: 100000
|
||||
master list distance cutoff = 2.8
|
||||
ghost atom cutoff = 2.8
|
||||
binsize = 1.4, bins = 26 26 1
|
||||
1 neighbor lists, perpetual/occasional/extra = 1 0 0
|
||||
(1) pair lj/cut, perpetual
|
||||
attributes: half, newton on
|
||||
pair build: half/bin/newton
|
||||
stencil: half/bin/2d
|
||||
bin: standard
|
||||
Per MPI rank memory allocation (min/avg/max) = 5.391 | 5.391 | 5.391 Mbytes
|
||||
Step Temp E_pair E_mol TotEng Press
|
||||
0 0.45318168 -1.3753652 0.13695201 -0.8705807 1.975423
|
||||
50 0.77344732 -1.6944083 0.13695201 -0.92967487 0.58657109
|
||||
100 0.53530681 -1.7006195 0.13695201 -1.1291768 0.11219772
|
||||
150 0.60820175 -1.8071581 0.13695201 -1.176549 1.5161796
|
||||
200 0.49410558 -1.7945459 0.13695201 -1.2565449 4.0469262
|
||||
250 0.52460847 -1.8528672 0.13695201 -1.290108 2.9929445
|
||||
300 0.46596803 -1.8680499 0.13695201 -1.3528872 2.7958851
|
||||
350 0.48831812 -1.8723486 0.13695201 -1.3390451 -4.5106818
|
||||
400 0.46798432 -1.9008529 0.13695201 -1.3840536 -4.3096566
|
||||
450 0.46000658 -1.9081144 0.13695201 -1.3977904 3.3360611
|
||||
500 0.45822409 -1.9077531 0.13695201 -1.3988759 0.45428738
|
||||
Loop time of 0.0650638 on 1 procs for 500 steps with 1200 atoms
|
||||
|
||||
Performance: 3319817.322 tau/day, 7684.762 timesteps/s, 9.222 Matom-step/s
|
||||
100.0% CPU use with 1 MPI tasks x no OpenMP threads
|
||||
|
||||
MPI task timing breakdown:
|
||||
Section | min time | avg time | max time |%varavg| %total
|
||||
---------------------------------------------------------------
|
||||
Pair | 0.027565 | 0.027565 | 0.027565 | 0.0 | 42.37
|
||||
Bond | 0.0007043 | 0.0007043 | 0.0007043 | 0.0 | 1.08
|
||||
Neigh | 0.012724 | 0.012724 | 0.012724 | 0.0 | 19.56
|
||||
Comm | 0.00091442 | 0.00091442 | 0.00091442 | 0.0 | 1.41
|
||||
Output | 6.004e-05 | 6.004e-05 | 6.004e-05 | 0.0 | 0.09
|
||||
Modify | 0.022329 | 0.022329 | 0.022329 | 0.0 | 34.32
|
||||
Other | | 0.0007666 | | | 1.18
|
||||
|
||||
Nlocal: 1200 ave 1200 max 1200 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
Nghost: 411 ave 411 max 411 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
Neighs: 8759 ave 8759 max 8759 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
|
||||
Total # of neighbors = 8759
|
||||
Ave neighs/atom = 7.2991667
|
||||
Ave special neighs/atom = 0.5
|
||||
Neighbor list builds = 46
|
||||
Dangerous builds = 2
|
||||
unfix 2
|
||||
unfix 4
|
||||
unfix 5
|
||||
fix 5 solute rigid/small molecule
|
||||
create bodies CPU = 0.000 seconds
|
||||
150 rigid bodies with 450 atoms
|
||||
1.3043524 = max distance from body owner to body atom
|
||||
fix 4 all enforce2d
|
||||
run 500
|
||||
Generated 0 of 6 mixed pair_coeff terms from geometric mixing rule
|
||||
Per MPI rank memory allocation (min/avg/max) = 9.306 | 9.306 | 9.306 Mbytes
|
||||
Step Temp E_pair E_mol TotEng Press
|
||||
500 0.45822409 -1.9077531 0.13695201 -1.3988759 2.4509752
|
||||
550 0.46736204 -1.9141964 0.13695201 -1.3979022 2.1695662
|
||||
600 0.47872194 -1.9232781 0.13695201 -1.3977635 2.0058379
|
||||
650 0.47491575 -1.9224109 0.13695201 -1.3999857 2.0637789
|
||||
700 0.44714331 -1.8990682 0.13695201 -1.3991848 2.4863082
|
||||
750 0.49089274 -1.9231004 0.13695201 -1.3877071 2.123147
|
||||
800 0.4753839 -1.8959698 0.13695201 -1.3731645 2.3030481
|
||||
850 0.46870816 -1.8972225 0.13695201 -1.3798357 2.2464703
|
||||
900 0.49610454 -1.9070748 0.13695201 -1.3674513 2.2196388
|
||||
950 0.4773035 -1.8925765 0.13695201 -1.3682132 2.3534786
|
||||
1000 0.50413702 -1.9292393 0.13695201 -1.383096 2.1630988
|
||||
Loop time of 0.0592806 on 1 procs for 500 steps with 1200 atoms
|
||||
|
||||
Performance: 3643690.276 tau/day, 8434.468 timesteps/s, 10.121 Matom-step/s
|
||||
100.0% CPU use with 1 MPI tasks x no OpenMP threads
|
||||
|
||||
MPI task timing breakdown:
|
||||
Section | min time | avg time | max time |%varavg| %total
|
||||
---------------------------------------------------------------
|
||||
Pair | 0.026866 | 0.026866 | 0.026866 | 0.0 | 45.32
|
||||
Bond | 0.00071863 | 0.00071863 | 0.00071863 | 0.0 | 1.21
|
||||
Neigh | 0.010927 | 0.010927 | 0.010927 | 0.0 | 18.43
|
||||
Comm | 0.00084187 | 0.00084187 | 0.00084187 | 0.0 | 1.42
|
||||
Output | 6.8106e-05 | 6.8106e-05 | 6.8106e-05 | 0.0 | 0.11
|
||||
Modify | 0.019075 | 0.019075 | 0.019075 | 0.0 | 32.18
|
||||
Other | | 0.000783 | | | 1.32
|
||||
|
||||
Nlocal: 1200 ave 1200 max 1200 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
Nghost: 417 ave 417 max 417 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
Neighs: 8654 ave 8654 max 8654 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
|
||||
Total # of neighbors = 8654
|
||||
Ave neighs/atom = 7.2116667
|
||||
Ave special neighs/atom = 0.5
|
||||
Neighbor list builds = 39
|
||||
Dangerous builds = 0
|
||||
Total wall time: 0:00:00
|
||||
272
examples/micelle/log.4Apr2024.micelle-rigid.g++.4
Normal file
272
examples/micelle/log.4Apr2024.micelle-rigid.g++.4
Normal file
@ -0,0 +1,272 @@
|
||||
LAMMPS (7 Feb 2024 - Development - patch_7Feb2024_update1-345-g506bf886ee-modified)
|
||||
WARNING: Using I/O redirection is unreliable with parallel runs. Better to use the -in switch to read input files. (../lammps.cpp:551)
|
||||
# 2d micelle simulation
|
||||
|
||||
dimension 2
|
||||
|
||||
neighbor 0.3 bin
|
||||
neigh_modify delay 5
|
||||
|
||||
atom_style bond
|
||||
|
||||
# Soft potential push-off
|
||||
|
||||
read_data data.micelle
|
||||
Reading data file ...
|
||||
orthogonal box = (0 0 -0.1) to (35.85686 35.85686 0.1)
|
||||
2 by 2 by 1 MPI processor grid
|
||||
reading atoms ...
|
||||
1200 atoms
|
||||
scanning bonds ...
|
||||
1 = max bonds/atom
|
||||
reading bonds ...
|
||||
300 bonds
|
||||
Finding 1-2 1-3 1-4 neighbors ...
|
||||
special bond factors lj: 0 0 0
|
||||
special bond factors coul: 0 0 0
|
||||
2 = max # of 1-2 neighbors
|
||||
1 = max # of 1-3 neighbors
|
||||
1 = max # of 1-4 neighbors
|
||||
2 = max # of special neighbors
|
||||
special bonds CPU = 0.000 seconds
|
||||
read_data CPU = 0.004 seconds
|
||||
special_bonds fene
|
||||
Finding 1-2 1-3 1-4 neighbors ...
|
||||
special bond factors lj: 0 1 1
|
||||
special bond factors coul: 0 1 1
|
||||
2 = max # of 1-2 neighbors
|
||||
2 = max # of special neighbors
|
||||
special bonds CPU = 0.000 seconds
|
||||
|
||||
pair_style soft 1.12246
|
||||
pair_coeff * * 0.0 1.12246
|
||||
|
||||
bond_style harmonic
|
||||
bond_coeff 1 50.0 0.75
|
||||
|
||||
velocity all create 0.45 2349852
|
||||
|
||||
variable prefactor equal ramp(1.0,20.0)
|
||||
|
||||
fix 1 all nve
|
||||
fix 2 all temp/rescale 100 0.45 0.45 0.02 1.0
|
||||
fix 3 all adapt 1 pair soft a * * v_prefactor
|
||||
fix 4 all enforce2d
|
||||
|
||||
thermo 50
|
||||
run 500
|
||||
Generated 0 of 6 mixed pair_coeff terms from geometric mixing rule
|
||||
WARNING: Communication cutoff 1.42246 is shorter than a bond length based estimate of 1.425. This may lead to errors. (../comm.cpp:730)
|
||||
Neighbor list info ...
|
||||
update: every = 1 steps, delay = 5 steps, check = yes
|
||||
max neighbors/atom: 2000, page size: 100000
|
||||
master list distance cutoff = 1.42246
|
||||
ghost atom cutoff = 1.42246
|
||||
binsize = 0.71123, bins = 51 51 1
|
||||
1 neighbor lists, perpetual/occasional/extra = 1 0 0
|
||||
(1) pair soft, perpetual
|
||||
attributes: half, newton on
|
||||
pair build: half/bin/newton
|
||||
stencil: half/bin/2d
|
||||
bin: standard
|
||||
WARNING: Communication cutoff 1.42246 is shorter than a bond length based estimate of 1.425. This may lead to errors. (../comm.cpp:730)
|
||||
Per MPI rank memory allocation (min/avg/max) = 4.126 | 4.126 | 4.127 Mbytes
|
||||
Step Temp E_pair E_mol TotEng Press
|
||||
0 0.45 0.40003481 2.2200223e-06 0.84966203 0.78952518
|
||||
50 0.54981866 0.93548899 0.068440043 1.5532895 1.9232786
|
||||
100 0.45 0.99659327 0.079228519 1.5254468 3.2135679
|
||||
150 0.86965411 0.90456016 0.07493355 1.8484231 4.3821925
|
||||
200 0.45 1.01454 0.10663502 1.5708 4.7598476
|
||||
250 0.79636561 0.82567712 0.12105337 1.7424325 5.4983899
|
||||
300 0.45 0.86475538 0.11819875 1.4325791 5.8554758
|
||||
350 0.72135464 0.70693069 0.10912636 1.5368106 6.0388247
|
||||
400 0.45 0.75067331 0.14165013 1.3419484 6.3840708
|
||||
450 0.64839221 0.62402486 0.14173679 1.4136135 6.4791009
|
||||
500 0.45 0.66669513 0.13695201 1.2532721 6.807146
|
||||
Loop time of 0.0138659 on 4 procs for 500 steps with 1200 atoms
|
||||
|
||||
Performance: 15577811.312 tau/day, 36059.748 timesteps/s, 43.272 Matom-step/s
|
||||
99.9% CPU use with 4 MPI tasks x no OpenMP threads
|
||||
|
||||
MPI task timing breakdown:
|
||||
Section | min time | avg time | max time |%varavg| %total
|
||||
---------------------------------------------------------------
|
||||
Pair | 0.0053896 | 0.0057144 | 0.0060899 | 0.4 | 41.21
|
||||
Bond | 0.00020074 | 0.00021422 | 0.00022291 | 0.0 | 1.54
|
||||
Neigh | 0.0025301 | 0.0025401 | 0.0025501 | 0.0 | 18.32
|
||||
Comm | 0.0031194 | 0.0035074 | 0.0038196 | 0.4 | 25.30
|
||||
Output | 6.4137e-05 | 6.7743e-05 | 7.7909e-05 | 0.0 | 0.49
|
||||
Modify | 0.0013391 | 0.0013582 | 0.0013972 | 0.1 | 9.80
|
||||
Other | | 0.0004638 | | | 3.34
|
||||
|
||||
Nlocal: 300 ave 304 max 292 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 2 1
|
||||
Nghost: 103.5 ave 108 max 98 min
|
||||
Histogram: 1 0 0 1 0 0 0 0 0 2
|
||||
Neighs: 773.5 ave 792 max 735 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 2 1
|
||||
|
||||
Total # of neighbors = 3094
|
||||
Ave neighs/atom = 2.5783333
|
||||
Ave special neighs/atom = 0.5
|
||||
Neighbor list builds = 52
|
||||
Dangerous builds = 0
|
||||
|
||||
unfix 3
|
||||
|
||||
# Main run
|
||||
|
||||
pair_style lj/cut 2.5
|
||||
|
||||
# solvent/head - full-size and long-range
|
||||
|
||||
pair_coeff 1 1 1.0 1.0 2.5
|
||||
pair_coeff 2 2 1.0 1.0 2.5
|
||||
pair_coeff 1 2 1.0 1.0 2.5
|
||||
|
||||
# tail/tail - size-averaged and long-range
|
||||
|
||||
pair_coeff 3 3 1.0 0.75 2.5
|
||||
pair_coeff 4 4 1.0 0.50 2.5
|
||||
pair_coeff 3 4 1.0 0.67 2.5
|
||||
|
||||
# solvent/tail - full-size and repulsive
|
||||
|
||||
pair_coeff 1 3 1.0 1.0 1.12246
|
||||
pair_coeff 1 4 1.0 1.0 1.12246
|
||||
|
||||
# head/tail - size-averaged and repulsive
|
||||
|
||||
pair_coeff 2 3 1.0 0.88 1.12246
|
||||
pair_coeff 2 4 1.0 0.75 1.12246
|
||||
|
||||
thermo 50
|
||||
|
||||
#dump 1 all atom 2000 dump.micelle
|
||||
|
||||
#dump 2 all image 2000 image.*.jpg type type zoom 1.6
|
||||
#dump_modify 2 pad 5 adiam 1 0.5 adiam 2 1.5 adiam 3 1.0 adiam 4 0.75
|
||||
|
||||
#dump 3 all movie 2000 movie.mpg type type zoom 1.6
|
||||
#dump_modify 3 pad 5 adiam 1 0.5 adiam 2 1.5 adiam 3 1.0 adiam 4 0.75
|
||||
|
||||
reset_timestep 0
|
||||
group solvent molecule 0
|
||||
750 atoms in group solvent
|
||||
group solute subtract all solvent
|
||||
450 atoms in group solute
|
||||
unfix 1
|
||||
unfix 2
|
||||
unfix 4
|
||||
fix 1 solvent nve
|
||||
fix 2 solvent temp/rescale 100 0.45 0.45 0.02 1.0
|
||||
fix 5 solute rigid molecule langevin 0.45 0.45 0.5 112211
|
||||
150 rigid bodies with 450 atoms
|
||||
fix 4 all enforce2d
|
||||
run 500
|
||||
Generated 0 of 6 mixed pair_coeff terms from geometric mixing rule
|
||||
Neighbor list info ...
|
||||
update: every = 1 steps, delay = 5 steps, check = yes
|
||||
max neighbors/atom: 2000, page size: 100000
|
||||
master list distance cutoff = 2.8
|
||||
ghost atom cutoff = 2.8
|
||||
binsize = 1.4, bins = 26 26 1
|
||||
1 neighbor lists, perpetual/occasional/extra = 1 0 0
|
||||
(1) pair lj/cut, perpetual
|
||||
attributes: half, newton on
|
||||
pair build: half/bin/newton
|
||||
stencil: half/bin/2d
|
||||
bin: standard
|
||||
Per MPI rank memory allocation (min/avg/max) = 5.375 | 5.375 | 5.375 Mbytes
|
||||
Step Temp E_pair E_mol TotEng Press
|
||||
0 0.45318168 -1.3753652 0.13695201 -0.8705807 1.975423
|
||||
50 0.77344732 -1.6944083 0.13695201 -0.92967487 0.58657109
|
||||
100 0.53530681 -1.7006195 0.13695201 -1.1291768 0.11219772
|
||||
150 0.60820175 -1.8071581 0.13695201 -1.176549 1.5161796
|
||||
200 0.49410558 -1.7945459 0.13695201 -1.2565449 4.0469262
|
||||
250 0.52460847 -1.8528672 0.13695201 -1.290108 2.9929445
|
||||
300 0.46596803 -1.8680499 0.13695201 -1.3528872 2.7958851
|
||||
350 0.48831812 -1.8723486 0.13695201 -1.3390451 -4.5106818
|
||||
400 0.46798432 -1.9008529 0.13695201 -1.3840536 -4.3096566
|
||||
450 0.46000658 -1.9081144 0.13695201 -1.3977904 3.3360611
|
||||
500 0.45822409 -1.9077531 0.13695201 -1.3988759 0.45428738
|
||||
Loop time of 0.0381773 on 4 procs for 500 steps with 1200 atoms
|
||||
|
||||
Performance: 5657810.772 tau/day, 13096.784 timesteps/s, 15.716 Matom-step/s
|
||||
99.6% CPU use with 4 MPI tasks x no OpenMP threads
|
||||
|
||||
MPI task timing breakdown:
|
||||
Section | min time | avg time | max time |%varavg| %total
|
||||
---------------------------------------------------------------
|
||||
Pair | 0.0059651 | 0.0062314 | 0.0066404 | 0.3 | 16.32
|
||||
Bond | 0.00021057 | 0.00022477 | 0.0002333 | 0.0 | 0.59
|
||||
Neigh | 0.0041424 | 0.0041487 | 0.0041512 | 0.0 | 10.87
|
||||
Comm | 0.004264 | 0.0047244 | 0.0050297 | 0.4 | 12.37
|
||||
Output | 8.2396e-05 | 8.6559e-05 | 9.6749e-05 | 0.0 | 0.23
|
||||
Modify | 0.021833 | 0.021946 | 0.022094 | 0.1 | 57.48
|
||||
Other | | 0.0008157 | | | 2.14
|
||||
|
||||
Nlocal: 300 ave 303 max 296 min
|
||||
Histogram: 1 0 0 0 1 0 0 0 1 1
|
||||
Nghost: 216.25 ave 219 max 214 min
|
||||
Histogram: 1 0 1 0 0 0 1 0 0 1
|
||||
Neighs: 2189.75 ave 2205 max 2173 min
|
||||
Histogram: 1 0 0 0 1 0 1 0 0 1
|
||||
|
||||
Total # of neighbors = 8759
|
||||
Ave neighs/atom = 7.2991667
|
||||
Ave special neighs/atom = 0.5
|
||||
Neighbor list builds = 46
|
||||
Dangerous builds = 2
|
||||
unfix 2
|
||||
unfix 4
|
||||
unfix 5
|
||||
fix 5 solute rigid/small molecule
|
||||
create bodies CPU = 0.000 seconds
|
||||
150 rigid bodies with 450 atoms
|
||||
1.3043524 = max distance from body owner to body atom
|
||||
fix 4 all enforce2d
|
||||
run 500
|
||||
Generated 0 of 6 mixed pair_coeff terms from geometric mixing rule
|
||||
Per MPI rank memory allocation (min/avg/max) = 9.233 | 9.233 | 9.234 Mbytes
|
||||
Step Temp E_pair E_mol TotEng Press
|
||||
500 0.45822409 -1.9077531 0.13695201 -1.3988759 2.4509752
|
||||
550 0.46736204 -1.9141964 0.13695201 -1.3979022 2.1695662
|
||||
600 0.47872194 -1.9232781 0.13695201 -1.3977635 2.0058379
|
||||
650 0.47491575 -1.9224109 0.13695201 -1.3999857 2.0637789
|
||||
700 0.44714331 -1.8990682 0.13695201 -1.3991848 2.4863082
|
||||
750 0.49089274 -1.9231004 0.13695201 -1.3877071 2.123147
|
||||
800 0.4753839 -1.8959698 0.13695201 -1.3731645 2.3030481
|
||||
850 0.46870816 -1.8972225 0.13695201 -1.3798357 2.2464703
|
||||
900 0.49610454 -1.9070748 0.13695201 -1.3674513 2.2196388
|
||||
950 0.4773035 -1.8925765 0.13695201 -1.3682132 2.3534786
|
||||
1000 0.50413702 -1.9292393 0.13695201 -1.383096 2.1630987
|
||||
Loop time of 0.0236819 on 4 procs for 500 steps with 1200 atoms
|
||||
|
||||
Performance: 9120883.727 tau/day, 21113.157 timesteps/s, 25.336 Matom-step/s
|
||||
99.9% CPU use with 4 MPI tasks x no OpenMP threads
|
||||
|
||||
MPI task timing breakdown:
|
||||
Section | min time | avg time | max time |%varavg| %total
|
||||
---------------------------------------------------------------
|
||||
Pair | 0.0058656 | 0.0059718 | 0.0061355 | 0.1 | 25.22
|
||||
Bond | 0.0002083 | 0.00022447 | 0.00023485 | 0.0 | 0.95
|
||||
Neigh | 0.0035477 | 0.0035644 | 0.0035824 | 0.0 | 15.05
|
||||
Comm | 0.0041037 | 0.0042227 | 0.0043024 | 0.1 | 17.83
|
||||
Output | 7.4355e-05 | 7.8273e-05 | 8.7777e-05 | 0.0 | 0.33
|
||||
Modify | 0.008976 | 0.0090549 | 0.0091663 | 0.1 | 38.24
|
||||
Other | | 0.0005654 | | | 2.39
|
||||
|
||||
Nlocal: 300 ave 306 max 295 min
|
||||
Histogram: 1 0 1 0 0 1 0 0 0 1
|
||||
Nghost: 221 ave 226 max 217 min
|
||||
Histogram: 1 0 0 1 1 0 0 0 0 1
|
||||
Neighs: 2163.5 ave 2271 max 2100 min
|
||||
Histogram: 1 1 0 1 0 0 0 0 0 1
|
||||
|
||||
Total # of neighbors = 8654
|
||||
Ave neighs/atom = 7.2116667
|
||||
Ave special neighs/atom = 0.5
|
||||
Neighbor list builds = 39
|
||||
Dangerous builds = 0
|
||||
Total wall time: 0:00:00
|
||||
@ -33,6 +33,7 @@
|
||||
|
||||
//#define ASYNC_DEVICE_COPY
|
||||
|
||||
#if 0
|
||||
#if !defined(USE_OPENCL) && !defined(USE_HIP)
|
||||
// temporary workaround for int2 also defined in cufft
|
||||
#ifdef int2
|
||||
@ -40,6 +41,7 @@
|
||||
#endif
|
||||
#include "cufft.h"
|
||||
#endif
|
||||
#endif
|
||||
|
||||
namespace LAMMPS_AL {
|
||||
|
||||
@ -313,10 +315,11 @@ class BaseAmoeba {
|
||||
virtual int fphi_mpole();
|
||||
virtual int polar_real(const int eflag, const int vflag) = 0;
|
||||
|
||||
|
||||
#if 0
|
||||
#if !defined(USE_OPENCL) && !defined(USE_HIP)
|
||||
cufftHandle plan;
|
||||
#endif
|
||||
#endif
|
||||
bool fft_plan_created;
|
||||
};
|
||||
|
||||
|
||||
@ -1,5 +1,105 @@
|
||||
# CHANGELOG
|
||||
|
||||
## [4.3.00](https://github.com/kokkos/kokkos/tree/4.3.00) (2024-03-19)
|
||||
[Full Changelog](https://github.com/kokkos/kokkos/compare/4.2.01...4.3.00)
|
||||
|
||||
### Features:
|
||||
* Add `Experimental::sort_by_key(exec, keys, values)` algorithm [\#6801](https://github.com/kokkos/kokkos/pull/6801)
|
||||
|
||||
### Backend and Architecture Enhancements:
|
||||
|
||||
#### CUDA:
|
||||
* Experimental multi-GPU support (from the same process) [\#6782](https://github.com/kokkos/kokkos/pull/6782)
|
||||
* Link against CUDA libraries even with KOKKOS_ENABLE_COMPILE_AS_CMAKE_LANGUAGE [\#6701](https://github.com/kokkos/kokkos/pull/6701)
|
||||
* Don't use the compiler launcher script if the CMake compile language is CUDA. [\#6704](https://github.com/kokkos/kokkos/pull/6704)
|
||||
* nvcc(wrapper): adding "long" and "short" versions for all flags [\#6615](https://github.com/kokkos/kokkos/pull/6615)
|
||||
|
||||
#### HIP:
|
||||
* Fix compilation when using amdclang (with ROCm >= 5.7) and RDC [\#6857](https://github.com/kokkos/kokkos/pull/6857)
|
||||
* Use rocthrust for sorting, when available [\#6793](https://github.com/kokkos/kokkos/pull/6793)
|
||||
|
||||
#### SYCL:
|
||||
* We only support OneAPI SYCL implementation: add check during initialization
|
||||
* Error out on initialization if the backend is different from `ext_oneapi_*` [\#6784](https://github.com/kokkos/kokkos/pull/6784)
|
||||
* Filter GPU devices for `ext_onapi_*` GPU devices [\#6758](https://github.com/kokkos/kokkos/pull/6784)
|
||||
* Performance Improvements
|
||||
* Avoid unnecessary zero-memset of the scratch flags in SYCL [\#6739](https://github.com/kokkos/kokkos/pull/6739)
|
||||
* Use host-pinned memory to copy reduction/scan result [\#6500](https://github.com/kokkos/kokkos/pull/6500)
|
||||
* Address deprecations after oneAPI 2023.2.0 [\#6577](https://github.com/kokkos/kokkos/pull/6739)
|
||||
* Make sure to call find_dependency for oneDPL if necessary [\#6870](https://github.com/kokkos/kokkos/pull/6870)
|
||||
|
||||
#### OpenMPTarget:
|
||||
* Use LLVM extensions for dynamic shared memory [\#6380](https://github.com/kokkos/kokkos/pull/6380)
|
||||
* Guard scratch memory usage in ParallelReduce [\#6585 ](https://github.com/kokkos/kokkos/pull/6585)
|
||||
* Update linker flags for Intel GPUs update [\#6735](https://github.com/kokkos/kokkos/pull/6735)
|
||||
* Improve handling of printf on Intel GPUs [\#6652](https://github.com/kokkos/kokkos/pull/6652)
|
||||
|
||||
#### OpenACC:
|
||||
* Add atomics support [\#6446](https://github.com/kokkos/kokkos/pull/6446)
|
||||
* Make the OpenACC backend asynchronous [\#6772](https://github.com/kokkos/kokkos/pull/6772)
|
||||
|
||||
#### Threads:
|
||||
* Add missing broadcast to TeamThreadRange parallel_scan [\#6601](https://github.com/kokkos/kokkos/pull/6446)
|
||||
|
||||
#### OpenMP:
|
||||
* Improve performance of view initializations and filling with zeros [\#6573](https://github.com/kokkos/kokkos/pull/6573)
|
||||
|
||||
### General Enhancements
|
||||
|
||||
* Improve performance of random number generation when using a normal distribution on GPUs [\#6556](https://github.com/kokkos/kokkos/pull/6556)
|
||||
* Allocate temporary view with the user-provided execution space instance and do not initialize in `unique` algorithm [\#6598](https://github.com/kokkos/kokkos/pull/6598)
|
||||
* Add deduction guide for `Kokkos::Array` [\#6373](https://github.com/kokkos/kokkos/pull/6373)
|
||||
* Provide new public headers `<Kokkos_Clamp.hpp>` and `<Kokkos_MinMax.hpp>` [\#6687](https://github.com/kokkos/kokkos/pull/6687)
|
||||
* Fix/improvement to `remove_if` parallel algorithm: use the provided execution space instance for temporary allocations and drop unnecessaryinitialization + avoid evaluating twice the predicate during final pass [\#6747](https://github.com/kokkos/kokkos/pull/6747)
|
||||
* Add runtime function to query the number of devices and make device ID consistent with `KOKKOS_VISIBLE_DEVICES` [\#6713](https://github.com/kokkos/kokkos/pull/6713)
|
||||
* simd: support `vector_aligned_tag` [\#6243](https://github.com/kokkos/kokkos/pull/6243)
|
||||
* Avoid unnecessary allocation when default constructing Bitset [\#6524](https://github.com/kokkos/kokkos/pull/6524)
|
||||
* Fix constness for views in std algorithms [\#6813](https://github.com/kokkos/kokkos/pull/6813)
|
||||
* Improve error message on unsafe implicit conversion in MDRangePolicy [\#6855](https://github.com/kokkos/kokkos/pull/6855)
|
||||
* CTAD (deduction guides) for RangePolicy [\#6850](https://github.com/kokkos/kokkos/pull/6850)
|
||||
* CTAD (deduction guides) for MDRangePolicy [\#5516](https://github.com/kokkos/kokkos/pull/5516)
|
||||
|
||||
### Build System Changes
|
||||
* Require `Kokkos_ENABLE_ATOMICS_BYPASS` option to bypass atomic operation for Serial backend only builds [\#6692](https://github.com/kokkos/kokkos/pull/6692)
|
||||
* Add support for RISCV and the Milk-V's Pioneer [\#6773](https://github.com/kokkos/kokkos/pull/6773)
|
||||
* Add C++26 standard to CMake setup [\#6733](https://github.com/kokkos/kokkos/pull/6733)
|
||||
* Fix Makefile when using gnu_generate_makefile.sh and make >= 4.3 [\#6606](https://github.com/kokkos/kokkos/pull/6606)
|
||||
* Cuda: Fix configuring with CMake >= 3.28.4 - temporary fallback to internal CudaToolkit.cmake [\#6898](https://github.com/kokkos/kokkos/pull/6898)
|
||||
|
||||
### Incompatibilities (i.e. breaking changes)
|
||||
* Remove all `DEPRECATED_CODE_3` option and all code that was guarded by it [\#6523](https://github.com/kokkos/kokkos/pull/6523)
|
||||
* Drop guards to accommodate external code defining `KOKKOS_ASSERT` [\#6665](https://github.com/kokkos/kokkos/pull/6665)
|
||||
* `Profiling::ProfilingSection(std::string)` constructor marked explicit and nodiscard [\#6690](https://github.com/kokkos/kokkos/pull/6690)
|
||||
* Add bound check preconditions for `RangePolicy` and `MDRangePolicy` [\#6617](https://github.com/kokkos/kokkos/pull/6617) [\#6726](https://github.com/kokkos/kokkos/pull/6726)
|
||||
* Add checks for unsafe implicit conversions in RangePolicy [\#6754](https://github.com/kokkos/kokkos/pull/6754)
|
||||
* Remove Kokkos::[b]half_t volatile overloads [\#6579](https://github.com/kokkos/kokkos/pull/6579)
|
||||
* Remove KOKKOS_IMPL_DO_NOT_USE_PRINTF [\#6593](https://github.com/kokkos/kokkos/pull/6593)
|
||||
* Check matching static extents in View constructor [\#5190 ](https://github.com/kokkos/kokkos/pull/5190)
|
||||
* Tools(profiling): fix typo Kokkos_Tools_Optim[i]zationGoal [\#6642](https://github.com/kokkos/kokkos/pull/6642)
|
||||
* Remove variadic range policy constructor (disallow passing multiple trailing chunk size arguments) [\#6845](https://github.com/kokkos/kokkos/pull/6845)
|
||||
* Improve message on view out of bounds access and always abort [\#6861](https://github.com/kokkos/kokkos/pull/6861)
|
||||
* Drop `KOKKOS_ENABLE_INTEL_MM_ALLOC` macro [\#6797](https://github.com/kokkos/kokkos/pull/6797)
|
||||
* Remove `Kokkos::Experimental::LogicalMemorySpace` (without going through deprecation) [\#6557](https://github.com/kokkos/kokkos/pull/6557)
|
||||
* Remove `Experimental::HBWSpace` and support for linking against memkind [\#6791](https://github.com/kokkos/kokkos/pull/6791)
|
||||
* Drop librt TPL and associated `KOKKOS_ENABLE_LIBRT` macro [\#6798](https://github.com/kokkos/kokkos/pull/6798)
|
||||
* Drop support for old CPU architectures (`ARCH_BGQ`, `ARCH_POWER7`, `ARCH_WSM` and associated `ARCH_SSE4` macro) [\#6806](https://github.com/kokkos/kokkos/pull/6806)
|
||||
* Drop support for deprecated command-line arguments and environment variables [\#6744](https://github.com/kokkos/kokkos/pull/6744)
|
||||
|
||||
### Deprecations
|
||||
* Provide kokkos_swap as part of Core and deprecate Experimental::swap in Algorithms [\#6697](https://github.com/kokkos/kokkos/pull/6697)
|
||||
* Deprecate {Cuda,HIP}::detect_device_count() and Cuda::[detect_]device_arch() [\#6710](https://github.com/kokkos/kokkos/pull/6710)
|
||||
* Deprecate `ExecutionSpace::in_parallel()` [\#6582](https://github.com/kokkos/kokkos/pull/6582)
|
||||
|
||||
### Bug Fixes
|
||||
* Fix team-level MDRange reductions: [\#6511](https://github.com/kokkos/kokkos/pull/6511)
|
||||
* Fix CUDA and SYCL small value type (16-bit) team reductions [\#5334](https://github.com/kokkos/kokkos/pull/5334)
|
||||
* Enable `{transform_}exclusive_scan` in place [\#6667](https://github.com/kokkos/kokkos/pull/6667)
|
||||
* `fill_random` overload that do not take an execution space instance argument should fence [\#6658](https://github.com/kokkos/kokkos/pull/6658)
|
||||
* HIP,Cuda,OpenMPTarget: Fixup use provided execution space when copying host inaccessible reduction result [\#6777](https://github.com/kokkos/kokkos/pull/6777)
|
||||
* Fix typo in `cuda_func_set_attribute[s]_wrapper` preventing proper setting of desired occupancy [\#6786](https://github.com/kokkos/kokkos/pull/6786)
|
||||
* Avoid undefined behavior due to conversion between signed and unsigned integers in shift_{right, left}_team_impl [\#6821](https://github.com/kokkos/kokkos/pull/6821)
|
||||
* Fix a bug in Makefile.kokkos when using AMD GPU architectures as `AMD_GFXYYY` [\#6892](https://github.com/kokkos/kokkos/pull/6892)
|
||||
|
||||
## [4.2.01](https://github.com/kokkos/kokkos/tree/4.2.01) (2023-12-07)
|
||||
[Full Changelog](https://github.com/kokkos/kokkos/compare/4.2.00...4.2.01)
|
||||
|
||||
@ -999,95 +1099,95 @@
|
||||
- Major update for OpenMPTarget: many capabilities now work. For details contact us.
|
||||
- Added DPC++/SYCL backend: primary capabilites are working.
|
||||
- Added Kokkos Graph API analogous to CUDA Graphs.
|
||||
- Added parallel_scan support with TeamThreadRange [\#3536](https://github.com/kokkos/kokkos/pull/#3536)
|
||||
- Added Logical Memory Spaces [\#3546](https://github.com/kokkos/kokkos/pull/#3546)
|
||||
- Added initial half precision support [\#3439](https://github.com/kokkos/kokkos/pull/#3439)
|
||||
- Experimental feature: control cuda occupancy [\#3379](https://github.com/kokkos/kokkos/pull/#3379)
|
||||
- Added parallel_scan support with TeamThreadRange [\#3536](https://github.com/kokkos/kokkos/pull/3536)
|
||||
- Added Logical Memory Spaces [\#3546](https://github.com/kokkos/kokkos/pull/3546)
|
||||
- Added initial half precision support [\#3439](https://github.com/kokkos/kokkos/pull/3439)
|
||||
- Experimental feature: control cuda occupancy [\#3379](https://github.com/kokkos/kokkos/pull/3379)
|
||||
|
||||
**Implemented enhancements Backends and Archs:**
|
||||
- Add a64fx and fujitsu Compiler support [\#3614](https://github.com/kokkos/kokkos/pull/#3614)
|
||||
- Adding support for AMD gfx908 archictecture [\#3375](https://github.com/kokkos/kokkos/pull/#3375)
|
||||
- SYCL parallel\_for MDRangePolicy [\#3583](https://github.com/kokkos/kokkos/pull/#3583)
|
||||
- SYCL add parallel\_scan [\#3577](https://github.com/kokkos/kokkos/pull/#3577)
|
||||
- SYCL custom reductions [\#3544](https://github.com/kokkos/kokkos/pull/#3544)
|
||||
- SYCL Enable container unit tests [\#3550](https://github.com/kokkos/kokkos/pull/#3550)
|
||||
- SYCL feature level 5 [\#3480](https://github.com/kokkos/kokkos/pull/#3480)
|
||||
- SYCL Feature level 4 (parallel\_for) [\#3474](https://github.com/kokkos/kokkos/pull/#3474)
|
||||
- SYCL feature level 3 [\#3451](https://github.com/kokkos/kokkos/pull/#3451)
|
||||
- SYCL feature level 2 [\#3447](https://github.com/kokkos/kokkos/pull/#3447)
|
||||
- OpenMPTarget: Hierarchial reduction for + operator on scalars [\#3504](https://github.com/kokkos/kokkos/pull/#3504)
|
||||
- OpenMPTarget hierarchical [\#3411](https://github.com/kokkos/kokkos/pull/#3411)
|
||||
- HIP Add Impl::atomic\_[store,load] [\#3440](https://github.com/kokkos/kokkos/pull/#3440)
|
||||
- HIP enable global lock arrays [\#3418](https://github.com/kokkos/kokkos/pull/#3418)
|
||||
- HIP Implement multiple occupancy paths for various HIP kernel launchers [\#3366](https://github.com/kokkos/kokkos/pull/#3366)
|
||||
- Add a64fx and fujitsu Compiler support [\#3614](https://github.com/kokkos/kokkos/pull/3614)
|
||||
- Adding support for AMD gfx908 archictecture [\#3375](https://github.com/kokkos/kokkos/pull/3375)
|
||||
- SYCL parallel\_for MDRangePolicy [\#3583](https://github.com/kokkos/kokkos/pull/3583)
|
||||
- SYCL add parallel\_scan [\#3577](https://github.com/kokkos/kokkos/pull/3577)
|
||||
- SYCL custom reductions [\#3544](https://github.com/kokkos/kokkos/pull/3544)
|
||||
- SYCL Enable container unit tests [\#3550](https://github.com/kokkos/kokkos/pull/3550)
|
||||
- SYCL feature level 5 [\#3480](https://github.com/kokkos/kokkos/pull/3480)
|
||||
- SYCL Feature level 4 (parallel\_for) [\#3474](https://github.com/kokkos/kokkos/pull/3474)
|
||||
- SYCL feature level 3 [\#3451](https://github.com/kokkos/kokkos/pull/3451)
|
||||
- SYCL feature level 2 [\#3447](https://github.com/kokkos/kokkos/pull/3447)
|
||||
- OpenMPTarget: Hierarchial reduction for + operator on scalars [\#3504](https://github.com/kokkos/kokkos/pull/3504)
|
||||
- OpenMPTarget hierarchical [\#3411](https://github.com/kokkos/kokkos/pull/3411)
|
||||
- HIP Add Impl::atomic\_[store,load] [\#3440](https://github.com/kokkos/kokkos/pull/3440)
|
||||
- HIP enable global lock arrays [\#3418](https://github.com/kokkos/kokkos/pull/3418)
|
||||
- HIP Implement multiple occupancy paths for various HIP kernel launchers [\#3366](https://github.com/kokkos/kokkos/pull/3366)
|
||||
|
||||
**Implemented enhancements Policies:**
|
||||
- MDRangePolicy: Let it be semiregular [\#3494](https://github.com/kokkos/kokkos/pull/#3494)
|
||||
- MDRangePolicy: Check narrowing conversion in construction [\#3527](https://github.com/kokkos/kokkos/pull/#3527)
|
||||
- MDRangePolicy: CombinedReducers support [\#3395](https://github.com/kokkos/kokkos/pull/#3395)
|
||||
- Kokkos Graph: Interface and Default Implementation [\#3362](https://github.com/kokkos/kokkos/pull/#3362)
|
||||
- Kokkos Graph: add Cuda Graph implementation [\#3369](https://github.com/kokkos/kokkos/pull/#3369)
|
||||
- TeamPolicy: implemented autotuning of team sizes and vector lengths [\#3206](https://github.com/kokkos/kokkos/pull/#3206)
|
||||
- RangePolicy: Initialize all data members in default constructor [\#3509](https://github.com/kokkos/kokkos/pull/#3509)
|
||||
- MDRangePolicy: Let it be semiregular [\#3494](https://github.com/kokkos/kokkos/pull/3494)
|
||||
- MDRangePolicy: Check narrowing conversion in construction [\#3527](https://github.com/kokkos/kokkos/pull/3527)
|
||||
- MDRangePolicy: CombinedReducers support [\#3395](https://github.com/kokkos/kokkos/pull/3395)
|
||||
- Kokkos Graph: Interface and Default Implementation [\#3362](https://github.com/kokkos/kokkos/pull/3362)
|
||||
- Kokkos Graph: add Cuda Graph implementation [\#3369](https://github.com/kokkos/kokkos/pull/3369)
|
||||
- TeamPolicy: implemented autotuning of team sizes and vector lengths [\#3206](https://github.com/kokkos/kokkos/pull/3206)
|
||||
- RangePolicy: Initialize all data members in default constructor [\#3509](https://github.com/kokkos/kokkos/pull/3509)
|
||||
|
||||
**Implemented enhancements BuildSystem:**
|
||||
- Auto-generate core test files for all backends [\#3488](https://github.com/kokkos/kokkos/pull/#3488)
|
||||
- Avoid rewriting test files when calling cmake [\#3548](https://github.com/kokkos/kokkos/pull/#3548)
|
||||
- RULE\_LAUNCH\_COMPILE and RULE\_LAUNCH\_LINK system for nvcc\_wrapper [\#3136](https://github.com/kokkos/kokkos/pull/#3136)
|
||||
- Adding -include as a known argument to nvcc\_wrapper [\#3434](https://github.com/kokkos/kokkos/pull/#3434)
|
||||
- Install hpcbind script [\#3402](https://github.com/kokkos/kokkos/pull/#3402)
|
||||
- cmake/kokkos\_tribits.cmake: add parsing for args [\#3457](https://github.com/kokkos/kokkos/pull/#3457)
|
||||
- Auto-generate core test files for all backends [\#3488](https://github.com/kokkos/kokkos/pull/3488)
|
||||
- Avoid rewriting test files when calling cmake [\#3548](https://github.com/kokkos/kokkos/pull/3548)
|
||||
- RULE\_LAUNCH\_COMPILE and RULE\_LAUNCH\_LINK system for nvcc\_wrapper [\#3136](https://github.com/kokkos/kokkos/pull/3136)
|
||||
- Adding -include as a known argument to nvcc\_wrapper [\#3434](https://github.com/kokkos/kokkos/pull/3434)
|
||||
- Install hpcbind script [\#3402](https://github.com/kokkos/kokkos/pull/3402)
|
||||
- cmake/kokkos\_tribits.cmake: add parsing for args [\#3457](https://github.com/kokkos/kokkos/pull/3457)
|
||||
|
||||
**Implemented enhancements Tools:**
|
||||
- Changed namespacing of Kokkos::Tools::Impl::Impl::tune\_policy [\#3455](https://github.com/kokkos/kokkos/pull/#3455)
|
||||
- Delegate to an impl allocate/deallocate method to allow specifying a SpaceHandle for MemorySpaces [\#3530](https://github.com/kokkos/kokkos/pull/#3530)
|
||||
- Use the Kokkos Profiling interface rather than the Impl interface [\#3518](https://github.com/kokkos/kokkos/pull/#3518)
|
||||
- Runtime option for tuning [\#3459](https://github.com/kokkos/kokkos/pull/#3459)
|
||||
- Dual View Tool Events [\#3326](https://github.com/kokkos/kokkos/pull/#3326)
|
||||
- Changed namespacing of Kokkos::Tools::Impl::Impl::tune\_policy [\#3455](https://github.com/kokkos/kokkos/pull/3455)
|
||||
- Delegate to an impl allocate/deallocate method to allow specifying a SpaceHandle for MemorySpaces [\#3530](https://github.com/kokkos/kokkos/pull/3530)
|
||||
- Use the Kokkos Profiling interface rather than the Impl interface [\#3518](https://github.com/kokkos/kokkos/pull/3518)
|
||||
- Runtime option for tuning [\#3459](https://github.com/kokkos/kokkos/pull/3459)
|
||||
- Dual View Tool Events [\#3326](https://github.com/kokkos/kokkos/pull/3326)
|
||||
|
||||
**Implemented enhancements Other:**
|
||||
- Abort on errors instead of just printing [\#3528](https://github.com/kokkos/kokkos/pull/#3528)
|
||||
- Enable C++14 macros unconditionally [\#3449](https://github.com/kokkos/kokkos/pull/#3449)
|
||||
- Make ViewMapping trivially copyable [\#3436](https://github.com/kokkos/kokkos/pull/#3436)
|
||||
- Rename struct ViewMapping to class [\#3435](https://github.com/kokkos/kokkos/pull/#3435)
|
||||
- Replace enums in Kokkos\_ViewMapping.hpp (removes -Wextra) [\#3422](https://github.com/kokkos/kokkos/pull/#3422)
|
||||
- Use bool for enums representing bools [\#3416](https://github.com/kokkos/kokkos/pull/#3416)
|
||||
- Fence active instead of default execution space instances [\#3388](https://github.com/kokkos/kokkos/pull/#3388)
|
||||
- Refactor parallel\_reduce fence usage [\#3359](https://github.com/kokkos/kokkos/pull/#3359)
|
||||
- Moved Space EBO helpers to Kokkos\_EBO [\#3357](https://github.com/kokkos/kokkos/pull/#3357)
|
||||
- Add remove\_cvref type trait [\#3340](https://github.com/kokkos/kokkos/pull/#3340)
|
||||
- Adding identity type traits and update definition of identity\_t alias [\#3339](https://github.com/kokkos/kokkos/pull/#3339)
|
||||
- Add is\_specialization\_of type trait [\#3338](https://github.com/kokkos/kokkos/pull/#3338)
|
||||
- Make ScratchMemorySpace semi-regular [\#3309](https://github.com/kokkos/kokkos/pull/#3309)
|
||||
- Optimize min/max atomics with early exit on no-op case [\#3265](https://github.com/kokkos/kokkos/pull/#3265)
|
||||
- Refactor Backend Development [\#2941](https://github.com/kokkos/kokkos/pull/#2941)
|
||||
- Abort on errors instead of just printing [\#3528](https://github.com/kokkos/kokkos/pull/3528)
|
||||
- Enable C++14 macros unconditionally [\#3449](https://github.com/kokkos/kokkos/pull/3449)
|
||||
- Make ViewMapping trivially copyable [\#3436](https://github.com/kokkos/kokkos/pull/3436)
|
||||
- Rename struct ViewMapping to class [\#3435](https://github.com/kokkos/kokkos/pull/3435)
|
||||
- Replace enums in Kokkos\_ViewMapping.hpp (removes -Wextra) [\#3422](https://github.com/kokkos/kokkos/pull/3422)
|
||||
- Use bool for enums representing bools [\#3416](https://github.com/kokkos/kokkos/pull/3416)
|
||||
- Fence active instead of default execution space instances [\#3388](https://github.com/kokkos/kokkos/pull/3388)
|
||||
- Refactor parallel\_reduce fence usage [\#3359](https://github.com/kokkos/kokkos/pull/3359)
|
||||
- Moved Space EBO helpers to Kokkos\_EBO [\#3357](https://github.com/kokkos/kokkos/pull/3357)
|
||||
- Add remove\_cvref type trait [\#3340](https://github.com/kokkos/kokkos/pull/3340)
|
||||
- Adding identity type traits and update definition of identity\_t alias [\#3339](https://github.com/kokkos/kokkos/pull/3339)
|
||||
- Add is\_specialization\_of type trait [\#3338](https://github.com/kokkos/kokkos/pull/3338)
|
||||
- Make ScratchMemorySpace semi-regular [\#3309](https://github.com/kokkos/kokkos/pull/3309)
|
||||
- Optimize min/max atomics with early exit on no-op case [\#3265](https://github.com/kokkos/kokkos/pull/3265)
|
||||
- Refactor Backend Development [\#2941](https://github.com/kokkos/kokkos/pull/2941)
|
||||
|
||||
**Fixed bugs:**
|
||||
- Fixup MDRangePolicy construction from Kokkos arrays [\#3591](https://github.com/kokkos/kokkos/pull/#3591)
|
||||
- Add atomic functions for unsigned long long using gcc built-in [\#3588](https://github.com/kokkos/kokkos/pull/#3588)
|
||||
- Fixup silent pointless comparison with zero in checked\_narrow\_cast (compiler workaround) [\#3566](https://github.com/kokkos/kokkos/pull/#3566)
|
||||
- Fixes for ROCm 3.9 [\#3565](https://github.com/kokkos/kokkos/pull/#3565)
|
||||
- Fix windows build issues which crept in for the CUDA build [\#3532](https://github.com/kokkos/kokkos/pull/#3532)
|
||||
- HIP Fix atomics of large data types and clean up lock arrays [\#3529](https://github.com/kokkos/kokkos/pull/#3529)
|
||||
- Pthreads fix exception resulting from 0 grain size [\#3510](https://github.com/kokkos/kokkos/pull/#3510)
|
||||
- Fixup do not require atomic operation to be default constructible [\#3503](https://github.com/kokkos/kokkos/pull/#3503)
|
||||
- Fix race condition in HIP backend [\#3467](https://github.com/kokkos/kokkos/pull/#3467)
|
||||
- Replace KOKKOS\_DEBUG with KOKKOS\_ENABLE\_DEBUG [\#3458](https://github.com/kokkos/kokkos/pull/#3458)
|
||||
- Fix multi-stream team scratch space definition for HIP [\#3398](https://github.com/kokkos/kokkos/pull/#3398)
|
||||
- HIP fix template deduction [\#3393](https://github.com/kokkos/kokkos/pull/#3393)
|
||||
- Fix compiling with HIP and C++17 [\#3390](https://github.com/kokkos/kokkos/pull/#3390)
|
||||
- Fix sigFPE in HIP blocksize deduction [\#3378](https://github.com/kokkos/kokkos/pull/#3378)
|
||||
- Type alias change: replace CS with CTS to avoid conflicts with NVSHMEM [\#3348](https://github.com/kokkos/kokkos/pull/#3348)
|
||||
- Clang compilation of CUDA backend on Windows [\#3345](https://github.com/kokkos/kokkos/pull/#3345)
|
||||
- Fix HBW support [\#3343](https://github.com/kokkos/kokkos/pull/#3343)
|
||||
- Added missing fences to unique token [\#3260](https://github.com/kokkos/kokkos/pull/#3260)
|
||||
- Fixup MDRangePolicy construction from Kokkos arrays [\#3591](https://github.com/kokkos/kokkos/pull/3591)
|
||||
- Add atomic functions for unsigned long long using gcc built-in [\#3588](https://github.com/kokkos/kokkos/pull/3588)
|
||||
- Fixup silent pointless comparison with zero in checked\_narrow\_cast (compiler workaround) [\#3566](https://github.com/kokkos/kokkos/pull/3566)
|
||||
- Fixes for ROCm 3.9 [\#3565](https://github.com/kokkos/kokkos/pull/3565)
|
||||
- Fix windows build issues which crept in for the CUDA build [\#3532](https://github.com/kokkos/kokkos/pull/3532)
|
||||
- HIP Fix atomics of large data types and clean up lock arrays [\#3529](https://github.com/kokkos/kokkos/pull/3529)
|
||||
- Pthreads fix exception resulting from 0 grain size [\#3510](https://github.com/kokkos/kokkos/pull/3510)
|
||||
- Fixup do not require atomic operation to be default constructible [\#3503](https://github.com/kokkos/kokkos/pull/3503)
|
||||
- Fix race condition in HIP backend [\#3467](https://github.com/kokkos/kokkos/pull/3467)
|
||||
- Replace KOKKOS\_DEBUG with KOKKOS\_ENABLE\_DEBUG [\#3458](https://github.com/kokkos/kokkos/pull/3458)
|
||||
- Fix multi-stream team scratch space definition for HIP [\#3398](https://github.com/kokkos/kokkos/pull/3398)
|
||||
- HIP fix template deduction [\#3393](https://github.com/kokkos/kokkos/pull/3393)
|
||||
- Fix compiling with HIP and C++17 [\#3390](https://github.com/kokkos/kokkos/pull/3390)
|
||||
- Fix sigFPE in HIP blocksize deduction [\#3378](https://github.com/kokkos/kokkos/pull/3378)
|
||||
- Type alias change: replace CS with CTS to avoid conflicts with NVSHMEM [\#3348](https://github.com/kokkos/kokkos/pull/3348)
|
||||
- Clang compilation of CUDA backend on Windows [\#3345](https://github.com/kokkos/kokkos/pull/3345)
|
||||
- Fix HBW support [\#3343](https://github.com/kokkos/kokkos/pull/3343)
|
||||
- Added missing fences to unique token [\#3260](https://github.com/kokkos/kokkos/pull/3260)
|
||||
|
||||
**Incompatibilities:**
|
||||
- Remove unused utilities (forward, move, and expand\_variadic) from Kokkos::Impl [\#3535](https://github.com/kokkos/kokkos/pull/#3535)
|
||||
- Remove unused traits [\#3534](https://github.com/kokkos/kokkos/pull/#3534)
|
||||
- HIP: Remove old HCC code [\#3301](https://github.com/kokkos/kokkos/pull/#3301)
|
||||
- Prepare for deprecation of ViewAllocateWithoutInitializing [\#3264](https://github.com/kokkos/kokkos/pull/#3264)
|
||||
- Remove ROCm backend [\#3148](https://github.com/kokkos/kokkos/pull/#3148)
|
||||
- Remove unused utilities (forward, move, and expand\_variadic) from Kokkos::Impl [\#3535](https://github.com/kokkos/kokkos/pull/3535)
|
||||
- Remove unused traits [\#3534](https://github.com/kokkos/kokkos/pull/3534)
|
||||
- HIP: Remove old HCC code [\#3301](https://github.com/kokkos/kokkos/pull/3301)
|
||||
- Prepare for deprecation of ViewAllocateWithoutInitializing [\#3264](https://github.com/kokkos/kokkos/pull/3264)
|
||||
- Remove ROCm backend [\#3148](https://github.com/kokkos/kokkos/pull/3148)
|
||||
|
||||
## [3.2.01](https://github.com/kokkos/kokkos/tree/3.2.01) (2020-11-17)
|
||||
[Full Changelog](https://github.com/kokkos/kokkos/compare/3.2.00...3.2.01)
|
||||
|
||||
@ -150,8 +150,8 @@ ENDIF()
|
||||
|
||||
|
||||
set(Kokkos_VERSION_MAJOR 4)
|
||||
set(Kokkos_VERSION_MINOR 2)
|
||||
set(Kokkos_VERSION_PATCH 1)
|
||||
set(Kokkos_VERSION_MINOR 3)
|
||||
set(Kokkos_VERSION_PATCH 0)
|
||||
set(Kokkos_VERSION "${Kokkos_VERSION_MAJOR}.${Kokkos_VERSION_MINOR}.${Kokkos_VERSION_PATCH}")
|
||||
message(STATUS "Kokkos version: ${Kokkos_VERSION}")
|
||||
math(EXPR KOKKOS_VERSION "${Kokkos_VERSION_MAJOR} * 10000 + ${Kokkos_VERSION_MINOR} * 100 + ${Kokkos_VERSION_PATCH}")
|
||||
|
||||
@ -11,8 +11,8 @@ CXXFLAGS += $(SHFLAGS)
|
||||
endif
|
||||
|
||||
KOKKOS_VERSION_MAJOR = 4
|
||||
KOKKOS_VERSION_MINOR = 2
|
||||
KOKKOS_VERSION_PATCH = 1
|
||||
KOKKOS_VERSION_MINOR = 3
|
||||
KOKKOS_VERSION_PATCH = 0
|
||||
KOKKOS_VERSION = $(shell echo $(KOKKOS_VERSION_MAJOR)*10000+$(KOKKOS_VERSION_MINOR)*100+$(KOKKOS_VERSION_PATCH) | bc)
|
||||
|
||||
# Options: Cuda,HIP,SYCL,OpenMPTarget,OpenMP,Threads,Serial
|
||||
@ -22,14 +22,14 @@ KOKKOS_DEVICES ?= "OpenMP"
|
||||
# Intel: KNC,KNL,SNB,HSW,BDW,SKL,SKX,ICL,ICX,SPR
|
||||
# NVIDIA: Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal60,Pascal61,Volta70,Volta72,Turing75,Ampere80,Ampere86,Ada89,Hopper90
|
||||
# ARM: ARMv80,ARMv81,ARMv8-ThunderX,ARMv8-TX2,A64FX
|
||||
# IBM: BGQ,Power7,Power8,Power9
|
||||
# AMD-GPUS: GFX906,GFX908,GFX90A,GFX940,GFX942,GFX1030,GFX1100
|
||||
# IBM: Power8,Power9
|
||||
# AMD-GPUS: AMD_GFX906,AMD_GFX908,AMD_GFX90A,AMD_GFX940,AMD_GFX942,AMD_GFX1030,AMD_GFX1100
|
||||
# AMD-CPUS: AMDAVX,Zen,Zen2,Zen3
|
||||
# Intel-GPUs: Gen9,Gen11,Gen12LP,DG1,XeHP,PVC
|
||||
KOKKOS_ARCH ?= ""
|
||||
# Options: yes,no
|
||||
KOKKOS_DEBUG ?= "no"
|
||||
# Options: hwloc,librt,experimental_memkind
|
||||
# Options: hwloc
|
||||
KOKKOS_USE_TPLS ?= ""
|
||||
# Options: c++17,c++1z,c++20,c++2a,c++23,c++2b
|
||||
KOKKOS_CXX_STANDARD ?= "c++17"
|
||||
@ -56,7 +56,7 @@ uppercase_internal=$(if $1,$$(subst $(firstword $1),$(call uppercase_internal,$(
|
||||
uppercase=$(eval uppercase_RESULT:=$(call uppercase_internal,$(uppercase_TABLE),$1))$(uppercase_RESULT)
|
||||
# Return a 1 if a string contains a substring and 0 if not
|
||||
# Note the search string should be without '"'
|
||||
# Example: $(call kokkos_has_string,"hwloc,librt",hwloc)
|
||||
# Example: $(call kokkos_has_string,"hwloc,libdl",hwloc)
|
||||
# Will return a 1
|
||||
kokkos_has_string=$(if $(findstring $(call uppercase,$2),$(call uppercase,$1)),1,0)
|
||||
# Returns 1 if the path exists, 0 otherwise
|
||||
@ -73,11 +73,11 @@ KOKKOS_INTERNAL_ENABLE_CXX20 := $(call kokkos_has_string,$(KOKKOS_CXX_STANDARD),
|
||||
KOKKOS_INTERNAL_ENABLE_CXX2A := $(call kokkos_has_string,$(KOKKOS_CXX_STANDARD),c++2a)
|
||||
KOKKOS_INTERNAL_ENABLE_CXX23 := $(call kokkos_has_string,$(KOKKOS_CXX_STANDARD),c++23)
|
||||
KOKKOS_INTERNAL_ENABLE_CXX2B := $(call kokkos_has_string,$(KOKKOS_CXX_STANDARD),c++2b)
|
||||
KOKKOS_INTERNAL_ENABLE_CXX26 := $(call kokkos_has_string,$(KOKKOS_CXX_STANDARD),c++26)
|
||||
KOKKOS_INTERNAL_ENABLE_CXX2C := $(call kokkos_has_string,$(KOKKOS_CXX_STANDARD),c++2c)
|
||||
|
||||
# Check for external libraries.
|
||||
KOKKOS_INTERNAL_USE_HWLOC := $(call kokkos_has_string,$(KOKKOS_USE_TPLS),hwloc)
|
||||
KOKKOS_INTERNAL_USE_LIBRT := $(call kokkos_has_string,$(KOKKOS_USE_TPLS),librt)
|
||||
KOKKOS_INTERNAL_USE_MEMKIND := $(call kokkos_has_string,$(KOKKOS_USE_TPLS),experimental_memkind)
|
||||
|
||||
# Check for advanced settings.
|
||||
KOKKOS_INTERNAL_ENABLE_COMPILER_WARNINGS := $(call kokkos_has_string,$(KOKKOS_OPTIONS),compiler_warnings)
|
||||
@ -318,7 +318,6 @@ endif
|
||||
|
||||
# Intel based.
|
||||
KOKKOS_INTERNAL_USE_ARCH_KNC := $(call kokkos_has_string,$(KOKKOS_ARCH),KNC)
|
||||
KOKKOS_INTERNAL_USE_ARCH_WSM := $(call kokkos_has_string,$(KOKKOS_ARCH),WSM)
|
||||
KOKKOS_INTERNAL_USE_ARCH_SNB := $(call kokkos_has_string,$(KOKKOS_ARCH),SNB)
|
||||
KOKKOS_INTERNAL_USE_ARCH_HSW := $(call kokkos_has_string,$(KOKKOS_ARCH),HSW)
|
||||
KOKKOS_INTERNAL_USE_ARCH_BDW := $(call kokkos_has_string,$(KOKKOS_ARCH),BDW)
|
||||
@ -398,11 +397,9 @@ KOKKOS_INTERNAL_USE_ARCH_A64FX := $(call kokkos_has_string,$(KOKKOS_ARCH),A64FX)
|
||||
KOKKOS_INTERNAL_USE_ARCH_ARM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_ARMV80)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV81)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX2)+$(KOKKOS_INTERNAL_USE_ARCH_A64FX) | bc))
|
||||
|
||||
# IBM based.
|
||||
KOKKOS_INTERNAL_USE_ARCH_BGQ := $(call kokkos_has_string,$(KOKKOS_ARCH),BGQ)
|
||||
KOKKOS_INTERNAL_USE_ARCH_POWER7 := $(call kokkos_has_string,$(KOKKOS_ARCH),Power7)
|
||||
KOKKOS_INTERNAL_USE_ARCH_POWER8 := $(call kokkos_has_string,$(KOKKOS_ARCH),Power8)
|
||||
KOKKOS_INTERNAL_USE_ARCH_POWER9 := $(call kokkos_has_string,$(KOKKOS_ARCH),Power9)
|
||||
KOKKOS_INTERNAL_USE_ARCH_IBM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_BGQ)+$(KOKKOS_INTERNAL_USE_ARCH_POWER7)+$(KOKKOS_INTERNAL_USE_ARCH_POWER8)+$(KOKKOS_INTERNAL_USE_ARCH_POWER9) | bc))
|
||||
KOKKOS_INTERNAL_USE_ARCH_IBM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_POWER8)+$(KOKKOS_INTERNAL_USE_ARCH_POWER9) | bc))
|
||||
|
||||
# AMD based.
|
||||
KOKKOS_INTERNAL_USE_ARCH_AMDAVX := $(call kokkos_has_string,$(KOKKOS_ARCH),AMDAVX)
|
||||
@ -413,22 +410,37 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ZEN3), 0)
|
||||
KOKKOS_INTERNAL_USE_ARCH_ZEN := $(call kokkos_has_string,$(KOKKOS_ARCH),Zen)
|
||||
endif
|
||||
endif
|
||||
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX906 := $(or $(call kokkos_has_string,$(KOKKOS_ARCH),VEGA906),$(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX906))
|
||||
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX908 := $(or $(call kokkos_has_string,$(KOKKOS_ARCH),VEGA908),$(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX908))
|
||||
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX90A := $(or $(call kokkos_has_string,$(KOKKOS_ARCH),VEGA90A),$(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX90A))
|
||||
|
||||
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX906 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX906)
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX906), 0)
|
||||
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX906 := $(call kokkos_has_string,$(KOKKOS_ARCH),VEGA906)
|
||||
endif
|
||||
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX908 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX908)
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX908), 0)
|
||||
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX908 := $(call kokkos_has_string,$(KOKKOS_ARCH),VEGA908)
|
||||
endif
|
||||
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX90A := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX90A)
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX90A), 0)
|
||||
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX90A := $(call kokkos_has_string,$(KOKKOS_ARCH),VEGA90A)
|
||||
endif
|
||||
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX940 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX940)
|
||||
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX942 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX942)
|
||||
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1030 := $(or $(call kokkos_has_string,$(KOKKOS_ARCH),NAVI1030),$(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX1030))
|
||||
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1100 := $(or $(call kokkos_has_string,$(KOKKOS_ARCH),NAVI1100),$(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX1100))
|
||||
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1030 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX1030)
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1030), 0)
|
||||
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1030 := $(call kokkos_has_string,$(KOKKOS_ARCH),NAVI1030)
|
||||
endif
|
||||
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1100 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX1100)
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1100), 0)
|
||||
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1100 := $(call kokkos_has_string,$(KOKKOS_ARCH),NAVI1100)
|
||||
endif
|
||||
|
||||
# Any AVX?
|
||||
KOKKOS_INTERNAL_USE_ARCH_SSE42 := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_WSM))
|
||||
KOKKOS_INTERNAL_USE_ARCH_AVX := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_SNB) + $(KOKKOS_INTERNAL_USE_ARCH_AMDAVX))
|
||||
KOKKOS_INTERNAL_USE_ARCH_AVX2 := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_HSW) + $(KOKKOS_INTERNAL_USE_ARCH_BDW) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN2) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN3))
|
||||
KOKKOS_INTERNAL_USE_ARCH_AVX512MIC := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_KNL))
|
||||
|
||||
# Incompatible flags?
|
||||
KOKKOS_INTERNAL_USE_ARCH_MULTIHOST := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_SSE42)+$(KOKKOS_INTERNAL_USE_ARCH_AVX)+$(KOKKOS_INTERNAL_USE_ARCH_AVX2)+$(KOKKOS_INTERNAL_USE_ARCH_AVX512MIC)+$(KOKKOS_INTERNAL_USE_ARCH_SKL)+$(KOKKOS_INTERNAL_USE_ARCH_SKX)+$(KOKKOS_INTERNAL_USE_ARCH_ICL)+$(KOKKOS_INTERNAL_USE_ARCH_ICX)+$(KOKKOS_INTERNAL_USE_ARCH_SPR)+$(KOKKOS_INTERNAL_USE_ARCH_KNC)+$(KOKKOS_INTERNAL_USE_ARCH_IBM)+$(KOKKOS_INTERNAL_USE_ARCH_ARM)>1") | bc)
|
||||
KOKKOS_INTERNAL_USE_ARCH_MULTIHOST := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_AVX)+$(KOKKOS_INTERNAL_USE_ARCH_AVX2)+$(KOKKOS_INTERNAL_USE_ARCH_AVX512MIC)+$(KOKKOS_INTERNAL_USE_ARCH_SKL)+$(KOKKOS_INTERNAL_USE_ARCH_SKX)+$(KOKKOS_INTERNAL_USE_ARCH_ICL)+$(KOKKOS_INTERNAL_USE_ARCH_ICX)+$(KOKKOS_INTERNAL_USE_ARCH_SPR)+$(KOKKOS_INTERNAL_USE_ARCH_KNC)+$(KOKKOS_INTERNAL_USE_ARCH_IBM)+$(KOKKOS_INTERNAL_USE_ARCH_ARM)>1") | bc)
|
||||
KOKKOS_INTERNAL_USE_ARCH_MULTIGPU := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_NVIDIA)>1") | bc)
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MULTIHOST), 1)
|
||||
@ -573,6 +585,16 @@ ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX2B), 1)
|
||||
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX2B_FLAG)
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CXX23")
|
||||
endif
|
||||
ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX26), 1)
|
||||
#I cannot make CMake add this in a good way - so add it here
|
||||
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX26_FLAG)
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CXX26")
|
||||
endif
|
||||
ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX2C), 1)
|
||||
#I cannot make CMake add this in a good way - so add it here
|
||||
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX2C_FLAG)
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CXX26")
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_ENABLE_DEBUG), 1)
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1)
|
||||
@ -612,27 +634,6 @@ ifeq ($(KOKKOS_INTERNAL_USE_HWLOC), 1)
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_HWLOC")
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_LIBRT), 1)
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_USE_LIBRT")
|
||||
KOKKOS_LIBS += -lrt
|
||||
KOKKOS_TPL_LIBRARY_NAMES += rt
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_MEMKIND), 1)
|
||||
ifneq ($(KOKKOS_CMAKE), yes)
|
||||
ifneq ($(MEMKIND_PATH),)
|
||||
KOKKOS_CPPFLAGS += -I$(MEMKIND_PATH)/include
|
||||
KOKKOS_LIBDIRS += -L$(MEMKIND_PATH)/lib
|
||||
KOKKOS_CXXLDFLAGS += -L$(MEMKIND_PATH)/lib
|
||||
KOKKOS_TPL_INCLUDE_DIRS += $(MEMKIND_PATH)/include
|
||||
KOKKOS_TPL_LIBRARY_DIRS += $(MEMKIND_PATH)/lib
|
||||
endif
|
||||
KOKKOS_LIBS += -lmemkind -lnuma
|
||||
KOKKOS_TPL_LIBRARY_NAMES += memkind numa
|
||||
endif
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_HBWSPACE")
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_ENABLE_LARGE_MEM_TESTS), 1)
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_LARGE_MEM_TESTS")
|
||||
endif
|
||||
@ -699,10 +700,6 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_IMPL_CUDA_CLANG_WORKAROUND")
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_CUDA_DISABLE_MALLOC_ASYNC), 0)
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_IMPL_CUDA_MALLOC_ASYNC")
|
||||
else
|
||||
@ -827,20 +824,6 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX2), 1)
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_SSE42), 1)
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_SSE42")
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
|
||||
KOKKOS_CXXFLAGS += -xSSE4.2
|
||||
KOKKOS_LDFLAGS += -xSSE4.2
|
||||
else ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
|
||||
else
|
||||
# Assume that this is a really a GNU compiler.
|
||||
KOKKOS_CXXFLAGS += -msse4.2
|
||||
KOKKOS_LDFLAGS += -msse4.2
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX), 1)
|
||||
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX")
|
||||
|
||||
@ -1249,7 +1232,6 @@ ifneq ($(KOKKOS_INTERNAL_NEW_CONFIG), 0)
|
||||
tmp := $(call kokkos_update_config_header, KOKKOS_FWD_HPP_, "KokkosCore_Config_FwdBackend.tmp", "KokkosCore_Config_FwdBackend.hpp")
|
||||
tmp := $(call kokkos_update_config_header, KOKKOS_SETUP_HPP_, "KokkosCore_Config_SetupBackend.tmp", "KokkosCore_Config_SetupBackend.hpp")
|
||||
tmp := $(call kokkos_update_config_header, KOKKOS_DECLARE_HPP_, "KokkosCore_Config_DeclareBackend.tmp", "KokkosCore_Config_DeclareBackend.hpp")
|
||||
tmp := $(call kokkos_update_config_header, KOKKOS_POST_INCLUDE_HPP_, "KokkosCore_Config_PostInclude.tmp", "KokkosCore_Config_PostInclude.hpp")
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
|
||||
tmp := $(call kokkos_append_config_header,"$H""include <fwd/Kokkos_Fwd_CUDA.hpp>","KokkosCore_Config_FwdBackend.hpp")
|
||||
tmp := $(call kokkos_append_config_header,"$H""include <decl/Kokkos_Declare_CUDA.hpp>","KokkosCore_Config_DeclareBackend.hpp")
|
||||
@ -1289,10 +1271,6 @@ ifneq ($(KOKKOS_INTERNAL_NEW_CONFIG), 0)
|
||||
tmp := $(call kokkos_append_config_header,"$H""include <fwd/Kokkos_Fwd_SERIAL.hpp>","KokkosCore_Config_FwdBackend.hpp")
|
||||
tmp := $(call kokkos_append_config_header,"$H""include <decl/Kokkos_Declare_SERIAL.hpp>","KokkosCore_Config_DeclareBackend.hpp")
|
||||
endif
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_MEMKIND), 1)
|
||||
tmp := $(call kokkos_append_config_header,"$H""include <fwd/Kokkos_Fwd_HBWSpace.hpp>","KokkosCore_Config_FwdBackend.hpp")
|
||||
tmp := $(call kokkos_append_config_header,"$H""include <decl/Kokkos_Declare_HBWSpace.hpp>","KokkosCore_Config_DeclareBackend.hpp")
|
||||
endif
|
||||
endif
|
||||
|
||||
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/*.hpp)
|
||||
@ -1403,11 +1381,6 @@ ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1)
|
||||
KOKKOS_TPL_LIBRARY_NAMES += hpx
|
||||
endif
|
||||
|
||||
# Don't include Kokkos_HBWSpace.cpp if not using MEMKIND to avoid a link warning.
|
||||
ifneq ($(KOKKOS_INTERNAL_USE_MEMKIND), 1)
|
||||
KOKKOS_SRC := $(filter-out $(KOKKOS_PATH)/core/src/impl/Kokkos_HBWSpace.cpp,$(KOKKOS_SRC))
|
||||
endif
|
||||
|
||||
# With Cygwin functions such as fdopen and fileno are not defined
|
||||
# when strict ansi is enabled. strict ansi gets enabled with -std=c++14
|
||||
# though. So we hard undefine it here. Not sure if that has any bad side effects
|
||||
@ -1461,6 +1434,12 @@ ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1)
|
||||
else
|
||||
tmp := $(call desul_append_header,"/* $H""undef DESUL_ATOMICS_ENABLE_OPENMP */")
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_OPENACC), 1)
|
||||
tmp := $(call desul_append_header,"$H""define DESUL_ATOMICS_ENABLE_OPENACC")
|
||||
else
|
||||
tmp := $(call desul_append_header,"/* $H""undef DESUL_ATOMICS_ENABLE_OPENACC */")
|
||||
endif
|
||||
tmp := $(call desul_append_header, "")
|
||||
tmp := $(call desul_append_header, "$H""endif")
|
||||
|
||||
@ -1493,7 +1472,7 @@ include $(KOKKOS_PATH)/Makefile.targets
|
||||
kokkos-clean:
|
||||
rm -f $(KOKKOS_OBJ_LINK) $(DESUL_CONFIG_HEADER) $(DESUL_INTERNAL_CONFIG_TMP) KokkosCore_config.h KokkosCore_config.tmp libkokkos.a KokkosCore_Config_SetupBackend.hpp \
|
||||
KokkosCore_Config_FwdBackend.hpp KokkosCore_Config_DeclareBackend.hpp KokkosCore_Config_DeclareBackend.tmp \
|
||||
KokkosCore_Config_FwdBackend.tmp KokkosCore_Config_PostInclude.hpp KokkosCore_Config_PostInclude.tmp KokkosCore_Config_SetupBackend.tmp
|
||||
KokkosCore_Config_FwdBackend.tmp KokkosCore_Config_SetupBackend.tmp
|
||||
|
||||
libkokkos.a: $(KOKKOS_OBJ_LINK) $(KOKKOS_SRC) $(KOKKOS_HEADERS)
|
||||
ar cr libkokkos.a $(KOKKOS_OBJ_LINK)
|
||||
|
||||
@ -20,8 +20,6 @@ Kokkos_TaskQueue.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Ta
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_TaskQueue.cpp
|
||||
Kokkos_HostThreadTeam.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HostThreadTeam.cpp
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HostThreadTeam.cpp
|
||||
Kokkos_Spinwait.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Spinwait.cpp
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Spinwait.cpp
|
||||
Kokkos_HostBarrier.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HostBarrier.cpp
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HostBarrier.cpp
|
||||
Kokkos_Profiling.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Profiling.cpp
|
||||
@ -30,8 +28,6 @@ Kokkos_SharedAlloc.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_SharedAlloc.cpp
|
||||
Kokkos_MemoryPool.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_MemoryPool.cpp
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_MemoryPool.cpp
|
||||
Kokkos_MemorySpace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_MemorySpace.cpp
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_MemorySpace.cpp
|
||||
Kokkos_HostSpace_deepcopy.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HostSpace_deepcopy.cpp
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HostSpace_deepcopy.cpp
|
||||
Kokkos_NumericTraits.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_NumericTraits.cpp
|
||||
@ -82,8 +78,10 @@ Lock_Array_HIP.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/tpls/desul/src/Lock_Array
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_THREADS), 1)
|
||||
Kokkos_ThreadsExec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec.cpp
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec.cpp
|
||||
Kokkos_Threads_Instance.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads/Kokkos_Threads_Instance.cpp
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_Threads_Instance.cpp
|
||||
Kokkos_Threads_Spinwait.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads/Kokkos_Threads_Spinwait.cpp
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_Spinwait.cpp
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
|
||||
@ -123,6 +121,3 @@ Kokkos_OpenACC_Instance.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenACC
|
||||
Kokkos_OpenACC_SharedAllocationRecord.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenACC/Kokkos_OpenACC_SharedAllocationRecord.cpp
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenACC/Kokkos_OpenACC_SharedAllocationRecord.cpp
|
||||
endif
|
||||
|
||||
Kokkos_HBWSpace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HBWSpace.cpp
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HBWSpace.cpp
|
||||
|
||||
@ -28,7 +28,7 @@ To start learning about Kokkos:
|
||||
|
||||
- [Use cases and Examples](https://kokkos.github.io/kokkos-core-wiki/usecases.html): a series of examples ranging from how to use Kokkos with MPI to Fortran interoperability.
|
||||
|
||||
For questions find us on Slack: https://kokkosteam.slack.com or open a github issue.
|
||||
For questions find us on Slack: https://kokkosteam.slack.com or open a GitHub issue.
|
||||
|
||||
For non-public questions send an email to: *crtrott(at)sandia.gov*
|
||||
|
||||
@ -48,10 +48,10 @@ Please see the [following page](https://kokkos.github.io/kokkos-core-wiki/citati
|
||||
|
||||
# License
|
||||
|
||||
[](https://opensource.org/licenses/BSD-3-Clause)
|
||||
[](https://spdx.org/licenses/LLVM-exception.html)
|
||||
|
||||
Under the terms of Contract DE-NA0003525 with NTESS,
|
||||
the U.S. Government retains certain rights in this software.
|
||||
|
||||
The full license statement used in all headers is available [here](https://kokkos.github.io/kokkos-core-wiki/license.html) or
|
||||
[here](https://github.com/kokkos/kokkos/blob/master/LICENSE).
|
||||
The full license statement used in all headers is available [here](https://kokkos.org/kokkos-core-wiki/license.html) or
|
||||
[here](https://github.com/kokkos/kokkos/blob/develop/LICENSE).
|
||||
|
||||
12
lib/kokkos/SECURITY.md
Normal file
12
lib/kokkos/SECURITY.md
Normal file
@ -0,0 +1,12 @@
|
||||
# Reporting Security Issues
|
||||
|
||||
To report a security issue, please email
|
||||
[lebrungrandt@ornl.gov](mailto:lebrungrandt@ornl.gov)
|
||||
and [crtrott@sandia.gov](mailto:crtrott@sandia.gov)
|
||||
with a description of the issue, the steps you took to create the issue,
|
||||
affected versions, and, if known, mitigations for the issue.
|
||||
|
||||
Our vulnerability management team will respond within 5 working days of your
|
||||
email. If the issue is confirmed as a vulnerability, we will open a
|
||||
Security Advisory and acknowledge your contributions as part of it. This project
|
||||
follows a 90 day disclosure timeline.
|
||||
@ -159,7 +159,6 @@ If you don't specify a CUDA build variant in a `packages.yaml` and you build you
|
||||
> spack install superscience
|
||||
````
|
||||
you may end up just getting the default Kokkos (i.e. Serial).
|
||||
Some examples are included in the `config/yaml` folder for common platforms.
|
||||
Before running `spack install <package>` we recommend running `spack spec <package>` to confirm your dependency tree is correct.
|
||||
For example, with Kokkos Kernels:
|
||||
````bash
|
||||
|
||||
@ -30,5 +30,5 @@ KOKKOS_LIB_INCLUDE_DIRECTORIES(kokkosalgorithms
|
||||
${CMAKE_CURRENT_SOURCE_DIR}
|
||||
)
|
||||
|
||||
|
||||
|
||||
KOKKOS_LINK_TPL(kokkoscontainers PUBLIC ROCTHRUST)
|
||||
KOKKOS_LINK_TPL(kokkoscore PUBLIC ONEDPL)
|
||||
|
||||
@ -849,18 +849,17 @@ class Random_XorShift64 {
|
||||
return drand(end - start) + start;
|
||||
}
|
||||
|
||||
// Marsaglia polar method for drawing a standard normal distributed random
|
||||
// Box-muller method for drawing a standard normal distributed random
|
||||
// number
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
double normal() {
|
||||
double S = 2.0;
|
||||
double U;
|
||||
while (S >= 1.0) {
|
||||
U = 2.0 * drand() - 1.0;
|
||||
const double V = 2.0 * drand() - 1.0;
|
||||
S = U * U + V * V;
|
||||
}
|
||||
return U * std::sqrt(-2.0 * std::log(S) / S);
|
||||
constexpr auto two_pi = 2 * Kokkos::numbers::pi_v<double>;
|
||||
|
||||
const double u = drand();
|
||||
const double v = drand();
|
||||
const double r = Kokkos::sqrt(-2.0 * Kokkos::log(u));
|
||||
const double theta = v * two_pi;
|
||||
return r * Kokkos::cos(theta);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
@ -1094,18 +1093,17 @@ class Random_XorShift1024 {
|
||||
return drand(end - start) + start;
|
||||
}
|
||||
|
||||
// Marsaglia polar method for drawing a standard normal distributed random
|
||||
// Box-muller method for drawing a standard normal distributed random
|
||||
// number
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
double normal() {
|
||||
double S = 2.0;
|
||||
double U;
|
||||
while (S >= 1.0) {
|
||||
U = 2.0 * drand() - 1.0;
|
||||
const double V = 2.0 * drand() - 1.0;
|
||||
S = U * U + V * V;
|
||||
}
|
||||
return U * std::sqrt(-2.0 * std::log(S) / S);
|
||||
constexpr auto two_pi = 2 * Kokkos::numbers::pi_v<double>;
|
||||
|
||||
const double u = drand();
|
||||
const double v = drand();
|
||||
const double r = Kokkos::sqrt(-2.0 * Kokkos::log(u));
|
||||
const double theta = v * two_pi;
|
||||
return r * Kokkos::cos(theta);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
@ -1545,13 +1543,23 @@ template <class ViewType, class RandomPool, class IndexType = int64_t>
|
||||
void fill_random(ViewType a, RandomPool g,
|
||||
typename ViewType::const_value_type begin,
|
||||
typename ViewType::const_value_type end) {
|
||||
fill_random(typename ViewType::execution_space{}, a, g, begin, end);
|
||||
Kokkos::fence(
|
||||
"fill_random: fence before since no execution space instance provided");
|
||||
typename ViewType::execution_space exec;
|
||||
fill_random(exec, a, g, begin, end);
|
||||
exec.fence(
|
||||
"fill_random: fence after since no execution space instance provided");
|
||||
}
|
||||
|
||||
template <class ViewType, class RandomPool, class IndexType = int64_t>
|
||||
void fill_random(ViewType a, RandomPool g,
|
||||
typename ViewType::const_value_type range) {
|
||||
fill_random(typename ViewType::execution_space{}, a, g, 0, range);
|
||||
Kokkos::fence(
|
||||
"fill_random: fence before since no execution space instance provided");
|
||||
typename ViewType::execution_space exec;
|
||||
fill_random(exec, a, g, 0, range);
|
||||
exec.fence(
|
||||
"fill_random: fence after since no execution space instance provided");
|
||||
}
|
||||
|
||||
} // namespace Kokkos
|
||||
|
||||
@ -23,6 +23,7 @@
|
||||
|
||||
#include "sorting/Kokkos_BinSortPublicAPI.hpp"
|
||||
#include "sorting/Kokkos_SortPublicAPI.hpp"
|
||||
#include "sorting/Kokkos_SortByKeyPublicAPI.hpp"
|
||||
#include "sorting/Kokkos_NestedSortPublicAPI.hpp"
|
||||
|
||||
#ifdef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_SORT
|
||||
|
||||
@ -35,7 +35,6 @@
|
||||
// following the std classification.
|
||||
|
||||
// modifying ops
|
||||
#include "std_algorithms/Kokkos_Swap.hpp"
|
||||
#include "std_algorithms/Kokkos_IterSwap.hpp"
|
||||
|
||||
// non-modifying sequence
|
||||
|
||||
117
lib/kokkos/algorithms/src/sorting/Kokkos_SortByKeyPublicAPI.hpp
Normal file
117
lib/kokkos/algorithms/src/sorting/Kokkos_SortByKeyPublicAPI.hpp
Normal file
@ -0,0 +1,117 @@
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 4.0
|
||||
// Copyright (2022) National Technology & Engineering
|
||||
// Solutions of Sandia, LLC (NTESS).
|
||||
//
|
||||
// Under the terms of Contract DE-NA0003525 with NTESS,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://kokkos.org/LICENSE for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//@HEADER
|
||||
|
||||
#ifndef KOKKOS_SORT_BY_KEY_PUBLIC_API_HPP_
|
||||
#define KOKKOS_SORT_BY_KEY_PUBLIC_API_HPP_
|
||||
|
||||
#include "./impl/Kokkos_SortByKeyImpl.hpp"
|
||||
#include <Kokkos_Core.hpp>
|
||||
#include <algorithm>
|
||||
|
||||
namespace Kokkos::Experimental {
|
||||
|
||||
// ---------------------------------------------------------------
|
||||
// basic overloads
|
||||
// ---------------------------------------------------------------
|
||||
|
||||
template <class ExecutionSpace, class KeysDataType, class... KeysProperties,
|
||||
class ValuesDataType, class... ValuesProperties>
|
||||
void sort_by_key(
|
||||
const ExecutionSpace& exec,
|
||||
const Kokkos::View<KeysDataType, KeysProperties...>& keys,
|
||||
const Kokkos::View<ValuesDataType, ValuesProperties...>& values) {
|
||||
// constraints
|
||||
using KeysType = Kokkos::View<KeysDataType, KeysProperties...>;
|
||||
using ValuesType = Kokkos::View<ValuesDataType, ValuesProperties...>;
|
||||
::Kokkos::Impl::static_assert_is_admissible_to_kokkos_sort_by_key(keys);
|
||||
::Kokkos::Impl::static_assert_is_admissible_to_kokkos_sort_by_key(values);
|
||||
|
||||
static_assert(SpaceAccessibility<ExecutionSpace,
|
||||
typename KeysType::memory_space>::accessible,
|
||||
"Kokkos::sort: execution space instance is not able to access "
|
||||
"the memory space of the keys View argument!");
|
||||
static_assert(
|
||||
SpaceAccessibility<ExecutionSpace,
|
||||
typename ValuesType::memory_space>::accessible,
|
||||
"Kokkos::sort: execution space instance is not able to access "
|
||||
"the memory space of the values View argument!");
|
||||
|
||||
static_assert(KeysType::static_extent(0) == 0 ||
|
||||
ValuesType::static_extent(0) == 0 ||
|
||||
KeysType::static_extent(0) == ValuesType::static_extent(0));
|
||||
if (values.size() != keys.size())
|
||||
Kokkos::abort((std::string("values and keys extents must be the same. The "
|
||||
"values extent is ") +
|
||||
std::to_string(values.size()) + ", and the keys extent is " +
|
||||
std::to_string(keys.size()) + ".")
|
||||
.c_str());
|
||||
|
||||
if (keys.extent(0) <= 1) {
|
||||
return;
|
||||
}
|
||||
|
||||
::Kokkos::Impl::sort_by_key_device_view_without_comparator(exec, keys,
|
||||
values);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------
|
||||
// overloads supporting a custom comparator
|
||||
// ---------------------------------------------------------------
|
||||
|
||||
template <class ExecutionSpace, class ComparatorType, class KeysDataType,
|
||||
class... KeysProperties, class ValuesDataType,
|
||||
class... ValuesProperties>
|
||||
void sort_by_key(
|
||||
const ExecutionSpace& exec,
|
||||
const Kokkos::View<KeysDataType, KeysProperties...>& keys,
|
||||
const Kokkos::View<ValuesDataType, ValuesProperties...>& values,
|
||||
const ComparatorType& comparator) {
|
||||
// constraints
|
||||
using KeysType = Kokkos::View<KeysDataType, KeysProperties...>;
|
||||
using ValuesType = Kokkos::View<ValuesDataType, ValuesProperties...>;
|
||||
::Kokkos::Impl::static_assert_is_admissible_to_kokkos_sort_by_key(keys);
|
||||
::Kokkos::Impl::static_assert_is_admissible_to_kokkos_sort_by_key(values);
|
||||
|
||||
static_assert(SpaceAccessibility<ExecutionSpace,
|
||||
typename KeysType::memory_space>::accessible,
|
||||
"Kokkos::sort: execution space instance is not able to access "
|
||||
"the memory space of the keys View argument!");
|
||||
static_assert(
|
||||
SpaceAccessibility<ExecutionSpace,
|
||||
typename ValuesType::memory_space>::accessible,
|
||||
"Kokkos::sort: execution space instance is not able to access "
|
||||
"the memory space of the values View argument!");
|
||||
|
||||
static_assert(KeysType::static_extent(0) == 0 ||
|
||||
ValuesType::static_extent(0) == 0 ||
|
||||
KeysType::static_extent(0) == ValuesType::static_extent(0));
|
||||
if (values.size() != keys.size())
|
||||
Kokkos::abort((std::string("values and keys extents must be the same. The "
|
||||
"values extent is ") +
|
||||
std::to_string(values.size()) + ", and the keys extent is " +
|
||||
std::to_string(keys.size()) + ".")
|
||||
.c_str());
|
||||
|
||||
if (keys.extent(0) <= 1) {
|
||||
return;
|
||||
}
|
||||
|
||||
::Kokkos::Impl::sort_by_key_device_view_with_comparator(exec, keys, values,
|
||||
comparator);
|
||||
}
|
||||
|
||||
} // namespace Kokkos::Experimental
|
||||
#endif
|
||||
@ -29,7 +29,7 @@ namespace Kokkos {
|
||||
// ---------------------------------------------------------------
|
||||
|
||||
template <class ExecutionSpace, class DataType, class... Properties>
|
||||
void sort([[maybe_unused]] const ExecutionSpace& exec,
|
||||
void sort(const ExecutionSpace& exec,
|
||||
const Kokkos::View<DataType, Properties...>& view) {
|
||||
// constraints
|
||||
using ViewType = Kokkos::View<DataType, Properties...>;
|
||||
@ -52,6 +52,7 @@ void sort([[maybe_unused]] const ExecutionSpace& exec,
|
||||
}
|
||||
|
||||
if constexpr (Impl::better_off_calling_std_sort_v<ExecutionSpace>) {
|
||||
exec.fence("Kokkos::sort without comparator use std::sort");
|
||||
auto first = ::Kokkos::Experimental::begin(view);
|
||||
auto last = ::Kokkos::Experimental::end(view);
|
||||
std::sort(first, last);
|
||||
@ -82,7 +83,7 @@ void sort(const Kokkos::View<DataType, Properties...>& view) {
|
||||
// ---------------------------------------------------------------
|
||||
template <class ExecutionSpace, class ComparatorType, class DataType,
|
||||
class... Properties>
|
||||
void sort([[maybe_unused]] const ExecutionSpace& exec,
|
||||
void sort(const ExecutionSpace& exec,
|
||||
const Kokkos::View<DataType, Properties...>& view,
|
||||
const ComparatorType& comparator) {
|
||||
// constraints
|
||||
@ -105,6 +106,7 @@ void sort([[maybe_unused]] const ExecutionSpace& exec,
|
||||
}
|
||||
|
||||
if constexpr (Impl::better_off_calling_std_sort_v<ExecutionSpace>) {
|
||||
exec.fence("Kokkos::sort with comparator use std::sort");
|
||||
auto first = ::Kokkos::Experimental::begin(view);
|
||||
auto last = ::Kokkos::Experimental::end(view);
|
||||
std::sort(first, last, comparator);
|
||||
|
||||
@ -18,7 +18,6 @@
|
||||
#define KOKKOS_NESTED_SORT_IMPL_HPP_
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
#include <std_algorithms/Kokkos_Swap.hpp>
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Experimental {
|
||||
@ -99,7 +98,7 @@ KOKKOS_INLINE_FUNCTION void sort_nested_impl(
|
||||
keyView(elem1) = key2;
|
||||
keyView(elem2) = key1;
|
||||
if constexpr (!std::is_same_v<ValueViewType, std::nullptr_t>) {
|
||||
Kokkos::Experimental::swap(valueView(elem1), valueView(elem2));
|
||||
Kokkos::kokkos_swap(valueView(elem1), valueView(elem2));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
401
lib/kokkos/algorithms/src/sorting/impl/Kokkos_SortByKeyImpl.hpp
Normal file
401
lib/kokkos/algorithms/src/sorting/impl/Kokkos_SortByKeyImpl.hpp
Normal file
@ -0,0 +1,401 @@
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 4.0
|
||||
// Copyright (2022) National Technology & Engineering
|
||||
// Solutions of Sandia, LLC (NTESS).
|
||||
//
|
||||
// Under the terms of Contract DE-NA0003525 with NTESS,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://kokkos.org/LICENSE for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//@HEADER
|
||||
|
||||
#ifndef KOKKOS_SORT_BY_KEY_FREE_FUNCS_IMPL_HPP_
|
||||
#define KOKKOS_SORT_BY_KEY_FREE_FUNCS_IMPL_HPP_
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
|
||||
#if defined(KOKKOS_ENABLE_CUDA)
|
||||
|
||||
// Workaround for `Instruction 'shfl' without '.sync' is not supported on
|
||||
// .target sm_70 and higher from PTX ISA version 6.4`.
|
||||
// Also see https://github.com/NVIDIA/cub/pull/170.
|
||||
#if !defined(CUB_USE_COOPERATIVE_GROUPS)
|
||||
#define CUB_USE_COOPERATIVE_GROUPS
|
||||
#endif
|
||||
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wshadow"
|
||||
|
||||
#if defined(KOKKOS_COMPILER_CLANG)
|
||||
// Some versions of Clang fail to compile Thrust, failing with errors like
|
||||
// this:
|
||||
// <snip>/thrust/system/cuda/detail/core/agent_launcher.h:557:11:
|
||||
// error: use of undeclared identifier 'va_printf'
|
||||
// The exact combination of versions for Clang and Thrust (or CUDA) for this
|
||||
// failure was not investigated, however even very recent version combination
|
||||
// (Clang 10.0.0 and Cuda 10.0) demonstrated failure.
|
||||
//
|
||||
// Defining _CubLog here locally allows us to avoid that code path, however
|
||||
// disabling some debugging diagnostics
|
||||
#pragma push_macro("_CubLog")
|
||||
#ifdef _CubLog
|
||||
#undef _CubLog
|
||||
#endif
|
||||
#define _CubLog
|
||||
#include <thrust/device_ptr.h>
|
||||
#include <thrust/sort.h>
|
||||
#pragma pop_macro("_CubLog")
|
||||
#else
|
||||
#include <thrust/device_ptr.h>
|
||||
#include <thrust/sort.h>
|
||||
#endif
|
||||
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(KOKKOS_ENABLE_ROCTHRUST)
|
||||
#include <thrust/device_ptr.h>
|
||||
#include <thrust/sort.h>
|
||||
#endif
|
||||
|
||||
#if defined(KOKKOS_ENABLE_ONEDPL) && \
|
||||
(ONEDPL_VERSION_MAJOR > 2022 || \
|
||||
(ONEDPL_VERSION_MAJOR == 2022 && ONEDPL_VERSION_MINOR >= 2))
|
||||
#define KOKKOS_ONEDPL_HAS_SORT_BY_KEY
|
||||
#include <oneapi/dpl/execution>
|
||||
#include <oneapi/dpl/algorithm>
|
||||
#endif
|
||||
|
||||
namespace Kokkos::Impl {
|
||||
|
||||
template <typename T>
|
||||
constexpr inline bool is_admissible_to_kokkos_sort_by_key =
|
||||
::Kokkos::is_view<T>::value&& T::rank() == 1 &&
|
||||
(std::is_same<typename T::traits::array_layout,
|
||||
Kokkos::LayoutLeft>::value ||
|
||||
std::is_same<typename T::traits::array_layout,
|
||||
Kokkos::LayoutRight>::value ||
|
||||
std::is_same<typename T::traits::array_layout,
|
||||
Kokkos::LayoutStride>::value);
|
||||
|
||||
template <class ViewType>
|
||||
KOKKOS_INLINE_FUNCTION constexpr void
|
||||
static_assert_is_admissible_to_kokkos_sort_by_key(const ViewType& /* view */) {
|
||||
static_assert(is_admissible_to_kokkos_sort_by_key<ViewType>,
|
||||
"Kokkos::sort_by_key only accepts 1D values View with "
|
||||
"LayoutRight, LayoutLeft or LayoutStride.");
|
||||
}
|
||||
|
||||
// For the fallback implementation for sort_by_key using Kokkos::sort, we need
|
||||
// to consider if Kokkos::sort defers to the fallback implementation that copies
|
||||
// the array to the host and uses std::sort, see
|
||||
// copy_to_host_run_stdsort_copy_back() in impl/Kokkos_SortImpl.hpp. If
|
||||
// sort_on_device_v is true, we assume that std::sort doesn't copy data.
|
||||
// Otherwise, we manually copy all data to the host and provide Kokkos::sort
|
||||
// with a host execution space.
|
||||
template <class ExecutionSpace, class Layout>
|
||||
inline constexpr bool sort_on_device_v = false;
|
||||
|
||||
#if defined(KOKKOS_ENABLE_CUDA)
|
||||
template <class Layout>
|
||||
inline constexpr bool sort_on_device_v<Kokkos::Cuda, Layout> = true;
|
||||
|
||||
template <class KeysDataType, class... KeysProperties, class ValuesDataType,
|
||||
class... ValuesProperties, class... MaybeComparator>
|
||||
void sort_by_key_cudathrust(
|
||||
const Kokkos::Cuda& exec,
|
||||
const Kokkos::View<KeysDataType, KeysProperties...>& keys,
|
||||
const Kokkos::View<ValuesDataType, ValuesProperties...>& values,
|
||||
MaybeComparator&&... maybeComparator) {
|
||||
const auto policy = thrust::cuda::par.on(exec.cuda_stream());
|
||||
auto keys_first = ::Kokkos::Experimental::begin(keys);
|
||||
auto keys_last = ::Kokkos::Experimental::end(keys);
|
||||
auto values_first = ::Kokkos::Experimental::begin(values);
|
||||
thrust::sort_by_key(policy, keys_first, keys_last, values_first,
|
||||
std::forward<MaybeComparator>(maybeComparator)...);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(KOKKOS_ENABLE_ROCTHRUST)
|
||||
template <class Layout>
|
||||
inline constexpr bool sort_on_device_v<Kokkos::HIP, Layout> = true;
|
||||
|
||||
template <class KeysDataType, class... KeysProperties, class ValuesDataType,
|
||||
class... ValuesProperties, class... MaybeComparator>
|
||||
void sort_by_key_rocthrust(
|
||||
const Kokkos::HIP& exec,
|
||||
const Kokkos::View<KeysDataType, KeysProperties...>& keys,
|
||||
const Kokkos::View<ValuesDataType, ValuesProperties...>& values,
|
||||
MaybeComparator&&... maybeComparator) {
|
||||
const auto policy = thrust::hip::par.on(exec.hip_stream());
|
||||
auto keys_first = ::Kokkos::Experimental::begin(keys);
|
||||
auto keys_last = ::Kokkos::Experimental::end(keys);
|
||||
auto values_first = ::Kokkos::Experimental::begin(values);
|
||||
thrust::sort_by_key(policy, keys_first, keys_last, values_first,
|
||||
std::forward<MaybeComparator>(maybeComparator)...);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(KOKKOS_ENABLE_ONEDPL)
|
||||
template <class Layout>
|
||||
inline constexpr bool sort_on_device_v<Kokkos::Experimental::SYCL, Layout> =
|
||||
std::is_same_v<Layout, Kokkos::LayoutLeft> ||
|
||||
std::is_same_v<Layout, Kokkos::LayoutRight>;
|
||||
|
||||
#ifdef KOKKOS_ONEDPL_HAS_SORT_BY_KEY
|
||||
template <class KeysDataType, class... KeysProperties, class ValuesDataType,
|
||||
class... ValuesProperties, class... MaybeComparator>
|
||||
void sort_by_key_onedpl(
|
||||
const Kokkos::Experimental::SYCL& exec,
|
||||
const Kokkos::View<KeysDataType, KeysProperties...>& keys,
|
||||
const Kokkos::View<ValuesDataType, ValuesProperties...>& values,
|
||||
MaybeComparator&&... maybeComparator) {
|
||||
if (keys.stride(0) != 1 && values.stride(0) != 1) {
|
||||
Kokkos::abort(
|
||||
"SYCL sort_by_key only supports rank-1 Views with stride(0) = 1.");
|
||||
}
|
||||
|
||||
// Can't use Experimental::begin/end here since the oneDPL then assumes that
|
||||
// the data is on the host.
|
||||
auto queue = exec.sycl_queue();
|
||||
auto policy = oneapi::dpl::execution::make_device_policy(queue);
|
||||
const int n = keys.extent(0);
|
||||
oneapi::dpl::sort_by_key(policy, keys.data(), keys.data() + n, values.data(),
|
||||
std::forward<MaybeComparator>(maybeComparator)...);
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
template <typename ExecutionSpace, typename PermutationView, typename ViewType>
|
||||
void applyPermutation(const ExecutionSpace& space,
|
||||
const PermutationView& permutation,
|
||||
const ViewType& view) {
|
||||
static_assert(std::is_integral<typename PermutationView::value_type>::value);
|
||||
|
||||
auto view_copy = Kokkos::create_mirror(
|
||||
Kokkos::view_alloc(space, typename ExecutionSpace::memory_space{},
|
||||
Kokkos::WithoutInitializing),
|
||||
view);
|
||||
Kokkos::deep_copy(space, view_copy, view);
|
||||
Kokkos::parallel_for(
|
||||
"Kokkos::sort_by_key_via_sort::permute_" + view.label(),
|
||||
Kokkos::RangePolicy<ExecutionSpace>(space, 0, view.extent(0)),
|
||||
KOKKOS_LAMBDA(int i) { view(i) = view_copy(permutation(i)); });
|
||||
}
|
||||
|
||||
template <class ExecutionSpace, class KeysDataType, class... KeysProperties,
|
||||
class ValuesDataType, class... ValuesProperties,
|
||||
class... MaybeComparator>
|
||||
void sort_by_key_via_sort(
|
||||
const ExecutionSpace& exec,
|
||||
const Kokkos::View<KeysDataType, KeysProperties...>& keys,
|
||||
const Kokkos::View<ValuesDataType, ValuesProperties...>& values,
|
||||
MaybeComparator&&... maybeComparator) {
|
||||
static_assert(sizeof...(MaybeComparator) <= 1);
|
||||
|
||||
auto const n = keys.size();
|
||||
|
||||
Kokkos::View<unsigned int*, ExecutionSpace> permute(
|
||||
Kokkos::view_alloc(exec, Kokkos::WithoutInitializing,
|
||||
"Kokkos::sort_by_key_via_sort::permute"),
|
||||
n);
|
||||
|
||||
// iota
|
||||
Kokkos::parallel_for(
|
||||
"Kokkos::sort_by_key_via_sort::iota",
|
||||
Kokkos::RangePolicy<ExecutionSpace>(exec, 0, n),
|
||||
KOKKOS_LAMBDA(int i) { permute(i) = i; });
|
||||
|
||||
using Layout =
|
||||
typename Kokkos::View<unsigned int*, ExecutionSpace>::array_layout;
|
||||
if constexpr (!sort_on_device_v<ExecutionSpace, Layout>) {
|
||||
auto host_keys = Kokkos::create_mirror_view(
|
||||
Kokkos::view_alloc(Kokkos::HostSpace{}, Kokkos::WithoutInitializing),
|
||||
keys);
|
||||
auto host_permute = Kokkos::create_mirror_view(
|
||||
Kokkos::view_alloc(Kokkos::HostSpace{}, Kokkos::WithoutInitializing),
|
||||
permute);
|
||||
Kokkos::deep_copy(exec, host_keys, keys);
|
||||
Kokkos::deep_copy(exec, host_permute, permute);
|
||||
|
||||
exec.fence("Kokkos::Impl::sort_by_key_via_sort: before host sort");
|
||||
Kokkos::DefaultHostExecutionSpace host_exec;
|
||||
|
||||
if constexpr (sizeof...(MaybeComparator) == 0) {
|
||||
Kokkos::sort(
|
||||
host_exec, host_permute,
|
||||
KOKKOS_LAMBDA(int i, int j) { return host_keys(i) < host_keys(j); });
|
||||
} else {
|
||||
auto keys_comparator =
|
||||
std::get<0>(std::tuple<MaybeComparator...>(maybeComparator...));
|
||||
Kokkos::sort(
|
||||
host_exec, host_permute, KOKKOS_LAMBDA(int i, int j) {
|
||||
return keys_comparator(host_keys(i), host_keys(j));
|
||||
});
|
||||
}
|
||||
host_exec.fence("Kokkos::Impl::sort_by_key_via_sort: after host sort");
|
||||
Kokkos::deep_copy(exec, permute, host_permute);
|
||||
} else {
|
||||
#ifdef KOKKOS_ENABLE_SYCL
|
||||
auto* raw_keys_in_comparator = keys.data();
|
||||
auto stride = keys.stride(0);
|
||||
if constexpr (sizeof...(MaybeComparator) == 0) {
|
||||
Kokkos::sort(
|
||||
exec, permute, KOKKOS_LAMBDA(int i, int j) {
|
||||
return raw_keys_in_comparator[i * stride] <
|
||||
raw_keys_in_comparator[j * stride];
|
||||
});
|
||||
} else {
|
||||
auto keys_comparator =
|
||||
std::get<0>(std::tuple<MaybeComparator...>(maybeComparator...));
|
||||
Kokkos::sort(
|
||||
exec, permute, KOKKOS_LAMBDA(int i, int j) {
|
||||
return keys_comparator(raw_keys_in_comparator[i * stride],
|
||||
raw_keys_in_comparator[j * stride]);
|
||||
});
|
||||
}
|
||||
#else
|
||||
if constexpr (sizeof...(MaybeComparator) == 0) {
|
||||
Kokkos::sort(
|
||||
exec, permute,
|
||||
KOKKOS_LAMBDA(int i, int j) { return keys(i) < keys(j); });
|
||||
} else {
|
||||
auto keys_comparator =
|
||||
std::get<0>(std::tuple<MaybeComparator...>(maybeComparator...));
|
||||
Kokkos::sort(
|
||||
exec, permute, KOKKOS_LAMBDA(int i, int j) {
|
||||
return keys_comparator(keys(i), keys(j));
|
||||
});
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
applyPermutation(exec, permute, keys);
|
||||
applyPermutation(exec, permute, values);
|
||||
}
|
||||
|
||||
// ------------------------------------------------------
|
||||
//
|
||||
// specialize cases for sorting by key without comparator
|
||||
//
|
||||
// ------------------------------------------------------
|
||||
|
||||
#if defined(KOKKOS_ENABLE_CUDA)
|
||||
template <class KeysDataType, class... KeysProperties, class ValuesDataType,
|
||||
class... ValuesProperties>
|
||||
void sort_by_key_device_view_without_comparator(
|
||||
const Kokkos::Cuda& exec,
|
||||
const Kokkos::View<KeysDataType, KeysProperties...>& keys,
|
||||
const Kokkos::View<ValuesDataType, ValuesProperties...>& values) {
|
||||
sort_by_key_cudathrust(exec, keys, values);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(KOKKOS_ENABLE_ROCTHRUST)
|
||||
template <class KeysDataType, class... KeysProperties, class ValuesDataType,
|
||||
class... ValuesProperties>
|
||||
void sort_by_key_device_view_without_comparator(
|
||||
const Kokkos::HIP& exec,
|
||||
const Kokkos::View<KeysDataType, KeysProperties...>& keys,
|
||||
const Kokkos::View<ValuesDataType, ValuesProperties...>& values) {
|
||||
sort_by_key_rocthrust(exec, keys, values);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(KOKKOS_ENABLE_ONEDPL)
|
||||
template <class KeysDataType, class... KeysProperties, class ValuesDataType,
|
||||
class... ValuesProperties>
|
||||
void sort_by_key_device_view_without_comparator(
|
||||
const Kokkos::Experimental::SYCL& exec,
|
||||
const Kokkos::View<KeysDataType, KeysProperties...>& keys,
|
||||
const Kokkos::View<ValuesDataType, ValuesProperties...>& values) {
|
||||
#ifdef KOKKOS_ONEDPL_HAS_SORT_BY_KEY
|
||||
if (keys.stride(0) == 1 && values.stride(0) == 1)
|
||||
sort_by_key_onedpl(exec, keys, values);
|
||||
else
|
||||
#endif
|
||||
sort_by_key_via_sort(exec, keys, values);
|
||||
}
|
||||
#endif
|
||||
|
||||
// fallback case
|
||||
template <class ExecutionSpace, class KeysDataType, class... KeysProperties,
|
||||
class ValuesDataType, class... ValuesProperties>
|
||||
std::enable_if_t<Kokkos::is_execution_space<ExecutionSpace>::value>
|
||||
sort_by_key_device_view_without_comparator(
|
||||
const ExecutionSpace& exec,
|
||||
const Kokkos::View<KeysDataType, KeysProperties...>& keys,
|
||||
const Kokkos::View<ValuesDataType, ValuesProperties...>& values) {
|
||||
sort_by_key_via_sort(exec, keys, values);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------
|
||||
//
|
||||
// specialize cases for sorting by key with comparator
|
||||
//
|
||||
// ---------------------------------------------------
|
||||
|
||||
#if defined(KOKKOS_ENABLE_CUDA)
|
||||
template <class ComparatorType, class KeysDataType, class... KeysProperties,
|
||||
class ValuesDataType, class... ValuesProperties>
|
||||
void sort_by_key_device_view_with_comparator(
|
||||
const Kokkos::Cuda& exec,
|
||||
const Kokkos::View<KeysDataType, KeysProperties...>& keys,
|
||||
const Kokkos::View<ValuesDataType, ValuesProperties...>& values,
|
||||
const ComparatorType& comparator) {
|
||||
sort_by_key_cudathrust(exec, keys, values, comparator);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(KOKKOS_ENABLE_ROCTHRUST)
|
||||
template <class ComparatorType, class KeysDataType, class... KeysProperties,
|
||||
class ValuesDataType, class... ValuesProperties>
|
||||
void sort_by_key_device_view_with_comparator(
|
||||
const Kokkos::HIP& exec,
|
||||
const Kokkos::View<KeysDataType, KeysProperties...>& keys,
|
||||
const Kokkos::View<ValuesDataType, ValuesProperties...>& values,
|
||||
const ComparatorType& comparator) {
|
||||
sort_by_key_rocthrust(exec, keys, values, comparator);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(KOKKOS_ENABLE_ONEDPL)
|
||||
template <class ComparatorType, class KeysDataType, class... KeysProperties,
|
||||
class ValuesDataType, class... ValuesProperties>
|
||||
void sort_by_key_device_view_with_comparator(
|
||||
const Kokkos::Experimental::SYCL& exec,
|
||||
const Kokkos::View<KeysDataType, KeysProperties...>& keys,
|
||||
const Kokkos::View<ValuesDataType, ValuesProperties...>& values,
|
||||
const ComparatorType& comparator) {
|
||||
#ifdef KOKKOS_ONEDPL_HAS_SORT_BY_KEY
|
||||
if (keys.stride(0) == 1 && values.stride(0) == 1)
|
||||
sort_by_key_onedpl(exec, keys, values, comparator);
|
||||
else
|
||||
#endif
|
||||
sort_by_key_via_sort(exec, keys, values, comparator);
|
||||
}
|
||||
#endif
|
||||
|
||||
// fallback case
|
||||
template <class ComparatorType, class ExecutionSpace, class KeysDataType,
|
||||
class... KeysProperties, class ValuesDataType,
|
||||
class... ValuesProperties>
|
||||
std::enable_if_t<Kokkos::is_execution_space<ExecutionSpace>::value>
|
||||
sort_by_key_device_view_with_comparator(
|
||||
const ExecutionSpace& exec,
|
||||
const Kokkos::View<KeysDataType, KeysProperties...>& keys,
|
||||
const Kokkos::View<ValuesDataType, ValuesProperties...>& values,
|
||||
const ComparatorType& comparator) {
|
||||
sort_by_key_via_sort(exec, keys, values, comparator);
|
||||
}
|
||||
|
||||
#undef KOKKOS_ONEDPL_HAS_SORT_BY_KEY
|
||||
|
||||
} // namespace Kokkos::Impl
|
||||
#endif
|
||||
@ -63,6 +63,11 @@
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(KOKKOS_ENABLE_ROCTHRUST)
|
||||
#include <thrust/device_ptr.h>
|
||||
#include <thrust/sort.h>
|
||||
#endif
|
||||
|
||||
#if defined(KOKKOS_ENABLE_ONEDPL)
|
||||
#include <oneapi/dpl/execution>
|
||||
#include <oneapi/dpl/algorithm>
|
||||
@ -184,6 +189,26 @@ void sort_cudathrust(const Cuda& space,
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(KOKKOS_ENABLE_ROCTHRUST)
|
||||
template <class DataType, class... Properties, class... MaybeComparator>
|
||||
void sort_rocthrust(const HIP& space,
|
||||
const Kokkos::View<DataType, Properties...>& view,
|
||||
MaybeComparator&&... maybeComparator) {
|
||||
using ViewType = Kokkos::View<DataType, Properties...>;
|
||||
static_assert(ViewType::rank == 1,
|
||||
"Kokkos::sort: currently only supports rank-1 Views.");
|
||||
|
||||
if (view.extent(0) <= 1) {
|
||||
return;
|
||||
}
|
||||
const auto exec = thrust::hip::par.on(space.hip_stream());
|
||||
auto first = ::Kokkos::Experimental::begin(view);
|
||||
auto last = ::Kokkos::Experimental::end(view);
|
||||
thrust::sort(exec, first, last,
|
||||
std::forward<MaybeComparator>(maybeComparator)...);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(KOKKOS_ENABLE_ONEDPL)
|
||||
template <class DataType, class... Properties, class... MaybeComparator>
|
||||
void sort_onedpl(const Kokkos::Experimental::SYCL& space,
|
||||
@ -274,6 +299,14 @@ void sort_device_view_without_comparator(
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(KOKKOS_ENABLE_ROCTHRUST)
|
||||
template <class DataType, class... Properties>
|
||||
void sort_device_view_without_comparator(
|
||||
const HIP& exec, const Kokkos::View<DataType, Properties...>& view) {
|
||||
sort_rocthrust(exec, view);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(KOKKOS_ENABLE_ONEDPL)
|
||||
template <class DataType, class... Properties>
|
||||
void sort_device_view_without_comparator(
|
||||
@ -320,6 +353,15 @@ void sort_device_view_with_comparator(
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(KOKKOS_ENABLE_ROCTHRUST)
|
||||
template <class ComparatorType, class DataType, class... Properties>
|
||||
void sort_device_view_with_comparator(
|
||||
const HIP& exec, const Kokkos::View<DataType, Properties...>& view,
|
||||
const ComparatorType& comparator) {
|
||||
sort_rocthrust(exec, view, comparator);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(KOKKOS_ENABLE_ONEDPL)
|
||||
template <class ComparatorType, class DataType, class... Properties>
|
||||
void sort_device_view_with_comparator(
|
||||
|
||||
@ -50,7 +50,7 @@ template <
|
||||
std::enable_if_t<::Kokkos::is_execution_space_v<ExecutionSpace>, int> = 0>
|
||||
auto copy(const ExecutionSpace& ex,
|
||||
const ::Kokkos::View<DataType1, Properties1...>& source,
|
||||
::Kokkos::View<DataType2, Properties2...>& dest) {
|
||||
const ::Kokkos::View<DataType2, Properties2...>& dest) {
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
|
||||
|
||||
@ -66,7 +66,7 @@ template <
|
||||
std::enable_if_t<::Kokkos::is_execution_space_v<ExecutionSpace>, int> = 0>
|
||||
auto copy(const std::string& label, const ExecutionSpace& ex,
|
||||
const ::Kokkos::View<DataType1, Properties1...>& source,
|
||||
::Kokkos::View<DataType2, Properties2...>& dest) {
|
||||
const ::Kokkos::View<DataType2, Properties2...>& dest) {
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
|
||||
|
||||
@ -93,7 +93,7 @@ template <typename TeamHandleType, typename DataType1, typename... Properties1,
|
||||
KOKKOS_FUNCTION auto copy(
|
||||
const TeamHandleType& teamHandle,
|
||||
const ::Kokkos::View<DataType1, Properties1...>& source,
|
||||
::Kokkos::View<DataType2, Properties2...>& dest) {
|
||||
const ::Kokkos::View<DataType2, Properties2...>& dest) {
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
|
||||
|
||||
|
||||
@ -50,7 +50,7 @@ template <
|
||||
std::enable_if_t<::Kokkos::is_execution_space_v<ExecutionSpace>, int> = 0>
|
||||
auto copy_backward(const ExecutionSpace& ex,
|
||||
const ::Kokkos::View<DataType1, Properties1...>& source,
|
||||
::Kokkos::View<DataType2, Properties2...>& dest) {
|
||||
const ::Kokkos::View<DataType2, Properties2...>& dest) {
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
|
||||
|
||||
@ -65,7 +65,7 @@ template <
|
||||
std::enable_if_t<::Kokkos::is_execution_space_v<ExecutionSpace>, int> = 0>
|
||||
auto copy_backward(const std::string& label, const ExecutionSpace& ex,
|
||||
const ::Kokkos::View<DataType1, Properties1...>& source,
|
||||
::Kokkos::View<DataType2, Properties2...>& dest) {
|
||||
const ::Kokkos::View<DataType2, Properties2...>& dest) {
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
|
||||
|
||||
@ -92,7 +92,7 @@ template <typename TeamHandleType, typename DataType1, typename... Properties1,
|
||||
KOKKOS_FUNCTION auto copy_backward(
|
||||
const TeamHandleType& teamHandle,
|
||||
const ::Kokkos::View<DataType1, Properties1...>& source,
|
||||
::Kokkos::View<DataType2, Properties2...>& dest) {
|
||||
const ::Kokkos::View<DataType2, Properties2...>& dest) {
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
|
||||
|
||||
|
||||
@ -54,7 +54,8 @@ template <
|
||||
std::enable_if_t<::Kokkos::is_execution_space_v<ExecutionSpace>, int> = 0>
|
||||
auto copy_if(const ExecutionSpace& ex,
|
||||
const ::Kokkos::View<DataType1, Properties1...>& source,
|
||||
::Kokkos::View<DataType2, Properties2...>& dest, Predicate pred) {
|
||||
const ::Kokkos::View<DataType2, Properties2...>& dest,
|
||||
Predicate pred) {
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
|
||||
|
||||
@ -69,7 +70,8 @@ template <
|
||||
std::enable_if_t<::Kokkos::is_execution_space_v<ExecutionSpace>, int> = 0>
|
||||
auto copy_if(const std::string& label, const ExecutionSpace& ex,
|
||||
const ::Kokkos::View<DataType1, Properties1...>& source,
|
||||
::Kokkos::View<DataType2, Properties2...>& dest, Predicate pred) {
|
||||
const ::Kokkos::View<DataType2, Properties2...>& dest,
|
||||
Predicate pred) {
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
|
||||
|
||||
@ -96,7 +98,7 @@ template <typename TeamHandleType, typename DataType1, typename... Properties1,
|
||||
KOKKOS_FUNCTION auto copy_if(
|
||||
const TeamHandleType& teamHandle,
|
||||
const ::Kokkos::View<DataType1, Properties1...>& source,
|
||||
::Kokkos::View<DataType2, Properties2...>& dest, Predicate pred) {
|
||||
const ::Kokkos::View<DataType2, Properties2...>& dest, Predicate pred) {
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
|
||||
|
||||
|
||||
@ -51,7 +51,7 @@ template <
|
||||
std::enable_if_t<::Kokkos::is_execution_space_v<ExecutionSpace>, int> = 0>
|
||||
auto copy_n(const ExecutionSpace& ex,
|
||||
const ::Kokkos::View<DataType1, Properties1...>& source, Size count,
|
||||
::Kokkos::View<DataType2, Properties2...>& dest) {
|
||||
const ::Kokkos::View<DataType2, Properties2...>& dest) {
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
|
||||
|
||||
@ -66,7 +66,7 @@ template <
|
||||
std::enable_if_t<::Kokkos::is_execution_space_v<ExecutionSpace>, int> = 0>
|
||||
auto copy_n(const std::string& label, const ExecutionSpace& ex,
|
||||
const ::Kokkos::View<DataType1, Properties1...>& source, Size count,
|
||||
::Kokkos::View<DataType2, Properties2...>& dest) {
|
||||
const ::Kokkos::View<DataType2, Properties2...>& dest) {
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
|
||||
|
||||
@ -93,7 +93,7 @@ template <typename TeamHandleType, typename DataType1, typename... Properties1,
|
||||
KOKKOS_FUNCTION auto copy_n(
|
||||
const TeamHandleType& teamHandle,
|
||||
const ::Kokkos::View<DataType1, Properties1...>& source, Size count,
|
||||
::Kokkos::View<DataType2, Properties2...>& dest) {
|
||||
const ::Kokkos::View<DataType2, Properties2...>& dest) {
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
|
||||
|
||||
|
||||
@ -80,7 +80,7 @@ template <
|
||||
std::enable_if_t<::Kokkos::is_execution_space_v<ExecutionSpace>, int> = 0>
|
||||
bool equal(const ExecutionSpace& ex,
|
||||
const ::Kokkos::View<DataType1, Properties1...>& view1,
|
||||
::Kokkos::View<DataType2, Properties2...>& view2) {
|
||||
const ::Kokkos::View<DataType2, Properties2...>& view2) {
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1);
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2);
|
||||
|
||||
@ -96,7 +96,7 @@ template <
|
||||
std::enable_if_t<::Kokkos::is_execution_space_v<ExecutionSpace>, int> = 0>
|
||||
bool equal(const std::string& label, const ExecutionSpace& ex,
|
||||
const ::Kokkos::View<DataType1, Properties1...>& view1,
|
||||
::Kokkos::View<DataType2, Properties2...>& view2) {
|
||||
const ::Kokkos::View<DataType2, Properties2...>& view2) {
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1);
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2);
|
||||
|
||||
@ -111,7 +111,7 @@ template <
|
||||
std::enable_if_t<::Kokkos::is_execution_space_v<ExecutionSpace>, int> = 0>
|
||||
bool equal(const ExecutionSpace& ex,
|
||||
const ::Kokkos::View<DataType1, Properties1...>& view1,
|
||||
::Kokkos::View<DataType2, Properties2...>& view2,
|
||||
const ::Kokkos::View<DataType2, Properties2...>& view2,
|
||||
BinaryPredicateType predicate) {
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1);
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2);
|
||||
@ -128,7 +128,7 @@ template <
|
||||
std::enable_if_t<::Kokkos::is_execution_space_v<ExecutionSpace>, int> = 0>
|
||||
bool equal(const std::string& label, const ExecutionSpace& ex,
|
||||
const ::Kokkos::View<DataType1, Properties1...>& view1,
|
||||
::Kokkos::View<DataType2, Properties2...>& view2,
|
||||
const ::Kokkos::View<DataType2, Properties2...>& view2,
|
||||
BinaryPredicateType predicate) {
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1);
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2);
|
||||
@ -227,7 +227,7 @@ template <typename TeamHandleType, typename DataType1, typename... Properties1,
|
||||
KOKKOS_FUNCTION bool equal(
|
||||
const TeamHandleType& teamHandle,
|
||||
const ::Kokkos::View<DataType1, Properties1...>& view1,
|
||||
::Kokkos::View<DataType2, Properties2...>& view2) {
|
||||
const ::Kokkos::View<DataType2, Properties2...>& view2) {
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1);
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2);
|
||||
|
||||
@ -243,7 +243,7 @@ template <typename TeamHandleType, typename DataType1, typename... Properties1,
|
||||
KOKKOS_FUNCTION bool equal(
|
||||
const TeamHandleType& teamHandle,
|
||||
const ::Kokkos::View<DataType1, Properties1...>& view1,
|
||||
::Kokkos::View<DataType2, Properties2...>& view2,
|
||||
const ::Kokkos::View<DataType2, Properties2...>& view2,
|
||||
BinaryPredicateType predicate) {
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1);
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2);
|
||||
|
||||
@ -19,7 +19,6 @@
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
#include "impl/Kokkos_Constraints.hpp"
|
||||
#include "Kokkos_Swap.hpp"
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Experimental {
|
||||
@ -33,7 +32,7 @@ struct StdIterSwapFunctor {
|
||||
KOKKOS_FUNCTION
|
||||
void operator()(int i) const {
|
||||
(void)i;
|
||||
::Kokkos::Experimental::swap(*m_a, *m_b);
|
||||
::Kokkos::kokkos_swap(*m_a, *m_b);
|
||||
}
|
||||
|
||||
KOKKOS_FUNCTION
|
||||
@ -58,6 +57,16 @@ void iter_swap(IteratorType1 a, IteratorType2 b) {
|
||||
Impl::iter_swap_impl(a, b);
|
||||
}
|
||||
|
||||
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
|
||||
template <class T>
|
||||
KOKKOS_DEPRECATED_WITH_COMMENT("Use Kokkos::kokkos_swap instead!")
|
||||
KOKKOS_FUNCTION
|
||||
void swap(T& a, T& b) noexcept(::Kokkos::kokkos_swap(std::declval<T&>(),
|
||||
std::declval<T&>())) {
|
||||
::Kokkos::kokkos_swap(a, b);
|
||||
}
|
||||
#endif
|
||||
|
||||
} // namespace Experimental
|
||||
} // namespace Kokkos
|
||||
|
||||
|
||||
@ -54,7 +54,7 @@ template <
|
||||
bool lexicographical_compare(
|
||||
const ExecutionSpace& ex,
|
||||
const ::Kokkos::View<DataType1, Properties1...>& view1,
|
||||
::Kokkos::View<DataType2, Properties2...>& view2) {
|
||||
const ::Kokkos::View<DataType2, Properties2...>& view2) {
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1);
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2);
|
||||
|
||||
@ -71,7 +71,7 @@ template <
|
||||
bool lexicographical_compare(
|
||||
const std::string& label, const ExecutionSpace& ex,
|
||||
const ::Kokkos::View<DataType1, Properties1...>& view1,
|
||||
::Kokkos::View<DataType2, Properties2...>& view2) {
|
||||
const ::Kokkos::View<DataType2, Properties2...>& view2) {
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1);
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2);
|
||||
|
||||
@ -112,7 +112,8 @@ template <
|
||||
bool lexicographical_compare(
|
||||
const ExecutionSpace& ex,
|
||||
const ::Kokkos::View<DataType1, Properties1...>& view1,
|
||||
::Kokkos::View<DataType2, Properties2...>& view2, ComparatorType comp) {
|
||||
const ::Kokkos::View<DataType2, Properties2...>& view2,
|
||||
ComparatorType comp) {
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1);
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2);
|
||||
|
||||
@ -129,7 +130,8 @@ template <
|
||||
bool lexicographical_compare(
|
||||
const std::string& label, const ExecutionSpace& ex,
|
||||
const ::Kokkos::View<DataType1, Properties1...>& view1,
|
||||
::Kokkos::View<DataType2, Properties2...>& view2, ComparatorType comp) {
|
||||
const ::Kokkos::View<DataType2, Properties2...>& view2,
|
||||
ComparatorType comp) {
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1);
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2);
|
||||
|
||||
@ -161,7 +163,7 @@ template <class TeamHandleType, class DataType1, class... Properties1,
|
||||
KOKKOS_FUNCTION bool lexicographical_compare(
|
||||
const TeamHandleType& teamHandle,
|
||||
const ::Kokkos::View<DataType1, Properties1...>& view1,
|
||||
::Kokkos::View<DataType2, Properties2...>& view2) {
|
||||
const ::Kokkos::View<DataType2, Properties2...>& view2) {
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1);
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2);
|
||||
|
||||
@ -187,7 +189,8 @@ template <class TeamHandleType, class DataType1, class... Properties1,
|
||||
KOKKOS_FUNCTION bool lexicographical_compare(
|
||||
const TeamHandleType& teamHandle,
|
||||
const ::Kokkos::View<DataType1, Properties1...>& view1,
|
||||
::Kokkos::View<DataType2, Properties2...>& view2, ComparatorType comp) {
|
||||
const ::Kokkos::View<DataType2, Properties2...>& view2,
|
||||
ComparatorType comp) {
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1);
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2);
|
||||
|
||||
|
||||
@ -50,7 +50,7 @@ template <
|
||||
std::enable_if_t<::Kokkos::is_execution_space_v<ExecutionSpace>, int> = 0>
|
||||
auto move(const ExecutionSpace& ex,
|
||||
const ::Kokkos::View<DataType1, Properties1...>& source,
|
||||
::Kokkos::View<DataType2, Properties2...>& dest) {
|
||||
const ::Kokkos::View<DataType2, Properties2...>& dest) {
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
|
||||
|
||||
@ -64,7 +64,7 @@ template <
|
||||
std::enable_if_t<::Kokkos::is_execution_space_v<ExecutionSpace>, int> = 0>
|
||||
auto move(const std::string& label, const ExecutionSpace& ex,
|
||||
const ::Kokkos::View<DataType1, Properties1...>& source,
|
||||
::Kokkos::View<DataType2, Properties2...>& dest) {
|
||||
const ::Kokkos::View<DataType2, Properties2...>& dest) {
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
|
||||
|
||||
@ -92,7 +92,7 @@ template <typename TeamHandleType, typename DataType1, typename... Properties1,
|
||||
KOKKOS_FUNCTION auto move(
|
||||
const TeamHandleType& teamHandle,
|
||||
const ::Kokkos::View<DataType1, Properties1...>& source,
|
||||
::Kokkos::View<DataType2, Properties2...>& dest) {
|
||||
const ::Kokkos::View<DataType2, Properties2...>& dest) {
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
|
||||
|
||||
|
||||
@ -41,7 +41,7 @@ template <
|
||||
std::enable_if_t<::Kokkos::is_execution_space_v<ExecutionSpace>, int> = 0>
|
||||
auto move_backward(const ExecutionSpace& ex,
|
||||
const ::Kokkos::View<DataType1, Properties1...>& source,
|
||||
::Kokkos::View<DataType2, Properties2...>& dest) {
|
||||
const ::Kokkos::View<DataType2, Properties2...>& dest) {
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
|
||||
|
||||
@ -65,7 +65,7 @@ template <
|
||||
std::enable_if_t<::Kokkos::is_execution_space_v<ExecutionSpace>, int> = 0>
|
||||
auto move_backward(const std::string& label, const ExecutionSpace& ex,
|
||||
const ::Kokkos::View<DataType1, Properties1...>& source,
|
||||
::Kokkos::View<DataType2, Properties2...>& dest) {
|
||||
const ::Kokkos::View<DataType2, Properties2...>& dest) {
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
|
||||
|
||||
@ -94,7 +94,7 @@ template <typename TeamHandleType, typename DataType1, typename... Properties1,
|
||||
KOKKOS_FUNCTION auto move_backward(
|
||||
const TeamHandleType& teamHandle,
|
||||
const ::Kokkos::View<DataType1, Properties1...>& source,
|
||||
::Kokkos::View<DataType2, Properties2...>& dest) {
|
||||
const ::Kokkos::View<DataType2, Properties2...>& dest) {
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
|
||||
|
||||
|
||||
@ -50,7 +50,7 @@ template <
|
||||
std::enable_if_t<::Kokkos::is_execution_space_v<ExecutionSpace>, int> = 0>
|
||||
auto reverse_copy(const ExecutionSpace& ex,
|
||||
const ::Kokkos::View<DataType1, Properties1...>& source,
|
||||
::Kokkos::View<DataType2, Properties2...>& dest) {
|
||||
const ::Kokkos::View<DataType2, Properties2...>& dest) {
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
|
||||
|
||||
@ -65,7 +65,7 @@ template <
|
||||
std::enable_if_t<::Kokkos::is_execution_space_v<ExecutionSpace>, int> = 0>
|
||||
auto reverse_copy(const std::string& label, const ExecutionSpace& ex,
|
||||
const ::Kokkos::View<DataType1, Properties1...>& source,
|
||||
::Kokkos::View<DataType2, Properties2...>& dest) {
|
||||
const ::Kokkos::View<DataType2, Properties2...>& dest) {
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
|
||||
|
||||
@ -94,7 +94,7 @@ template <typename TeamHandleType, typename DataType1, typename... Properties1,
|
||||
KOKKOS_FUNCTION auto reverse_copy(
|
||||
const TeamHandleType& teamHandle,
|
||||
const ::Kokkos::View<DataType1, Properties1...>& source,
|
||||
::Kokkos::View<DataType2, Properties2...>& dest) {
|
||||
const ::Kokkos::View<DataType2, Properties2...>& dest) {
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
|
||||
|
||||
|
||||
@ -40,7 +40,7 @@ template <typename ExecutionSpace, typename DataType1, typename... Properties1,
|
||||
std::enable_if_t<is_execution_space_v<ExecutionSpace>, int> = 0>
|
||||
auto swap_ranges(const ExecutionSpace& ex,
|
||||
const ::Kokkos::View<DataType1, Properties1...>& source,
|
||||
::Kokkos::View<DataType2, Properties2...>& dest) {
|
||||
const ::Kokkos::View<DataType2, Properties2...>& dest) {
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
|
||||
|
||||
@ -64,7 +64,7 @@ template <typename ExecutionSpace, typename DataType1, typename... Properties1,
|
||||
std::enable_if_t<is_execution_space_v<ExecutionSpace>, int> = 0>
|
||||
auto swap_ranges(const std::string& label, const ExecutionSpace& ex,
|
||||
const ::Kokkos::View<DataType1, Properties1...>& source,
|
||||
::Kokkos::View<DataType2, Properties2...>& dest) {
|
||||
const ::Kokkos::View<DataType2, Properties2...>& dest) {
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
|
||||
|
||||
@ -94,7 +94,7 @@ template <typename TeamHandleType, typename DataType1, typename... Properties1,
|
||||
KOKKOS_FUNCTION auto swap_ranges(
|
||||
const TeamHandleType& teamHandle,
|
||||
const ::Kokkos::View<DataType1, Properties1...>& source,
|
||||
::Kokkos::View<DataType2, Properties2...>& dest) {
|
||||
const ::Kokkos::View<DataType2, Properties2...>& dest) {
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
|
||||
|
||||
|
||||
@ -58,7 +58,7 @@ template <typename ExecutionSpace, typename DataType1, typename... Properties1,
|
||||
std::enable_if_t<is_execution_space_v<ExecutionSpace>, int> = 0>
|
||||
auto transform(const ExecutionSpace& ex,
|
||||
const ::Kokkos::View<DataType1, Properties1...>& source,
|
||||
::Kokkos::View<DataType2, Properties2...>& dest,
|
||||
const ::Kokkos::View<DataType2, Properties2...>& dest,
|
||||
UnaryOperation unary_op) {
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
|
||||
@ -73,7 +73,7 @@ template <typename ExecutionSpace, typename DataType1, typename... Properties1,
|
||||
std::enable_if_t<is_execution_space_v<ExecutionSpace>, int> = 0>
|
||||
auto transform(const std::string& label, const ExecutionSpace& ex,
|
||||
const ::Kokkos::View<DataType1, Properties1...>& source,
|
||||
::Kokkos::View<DataType2, Properties2...>& dest,
|
||||
const ::Kokkos::View<DataType2, Properties2...>& dest,
|
||||
UnaryOperation unary_op) {
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
|
||||
@ -119,7 +119,7 @@ template <typename ExecutionSpace, typename DataType1, typename... Properties1,
|
||||
auto transform(const ExecutionSpace& ex,
|
||||
const ::Kokkos::View<DataType1, Properties1...>& source1,
|
||||
const ::Kokkos::View<DataType2, Properties2...>& source2,
|
||||
::Kokkos::View<DataType3, Properties3...>& dest,
|
||||
const ::Kokkos::View<DataType3, Properties3...>& dest,
|
||||
BinaryOperation binary_op) {
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source1);
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source2);
|
||||
@ -137,7 +137,7 @@ template <typename ExecutionSpace, typename DataType1, typename... Properties1,
|
||||
auto transform(const std::string& label, const ExecutionSpace& ex,
|
||||
const ::Kokkos::View<DataType1, Properties1...>& source1,
|
||||
const ::Kokkos::View<DataType2, Properties2...>& source2,
|
||||
::Kokkos::View<DataType3, Properties3...>& dest,
|
||||
const ::Kokkos::View<DataType3, Properties3...>& dest,
|
||||
BinaryOperation binary_op) {
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source1);
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source2);
|
||||
@ -174,7 +174,8 @@ template <typename TeamHandleType, typename DataType1, typename... Properties1,
|
||||
KOKKOS_FUNCTION auto transform(
|
||||
const TeamHandleType& teamHandle,
|
||||
const ::Kokkos::View<DataType1, Properties1...>& source,
|
||||
::Kokkos::View<DataType2, Properties2...>& dest, UnaryOperation unary_op) {
|
||||
const ::Kokkos::View<DataType2, Properties2...>& dest,
|
||||
UnaryOperation unary_op) {
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source);
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest);
|
||||
|
||||
@ -207,7 +208,7 @@ KOKKOS_FUNCTION auto transform(
|
||||
const TeamHandleType& teamHandle,
|
||||
const ::Kokkos::View<DataType1, Properties1...>& source1,
|
||||
const ::Kokkos::View<DataType2, Properties2...>& source2,
|
||||
::Kokkos::View<DataType3, Properties3...>& dest,
|
||||
const ::Kokkos::View<DataType3, Properties3...>& dest,
|
||||
BinaryOperation binary_op) {
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source1);
|
||||
Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source2);
|
||||
|
||||
@ -47,8 +47,9 @@ struct ExclusiveScanDefaultFunctorForKnownNeutralElement {
|
||||
KOKKOS_FUNCTION
|
||||
void operator()(const IndexType i, ValueType& update,
|
||||
const bool final_pass) const {
|
||||
const auto tmp = m_first_from[i];
|
||||
if (final_pass) m_first_dest[i] = update + m_init_value;
|
||||
update += m_first_from[i];
|
||||
update += tmp;
|
||||
}
|
||||
};
|
||||
|
||||
@ -73,6 +74,7 @@ struct ExclusiveScanDefaultFunctorWithValueWrapper {
|
||||
KOKKOS_FUNCTION
|
||||
void operator()(const IndexType i, value_type& update,
|
||||
const bool final_pass) const {
|
||||
const auto tmp = value_type{m_first_from[i], false};
|
||||
if (final_pass) {
|
||||
if (i == 0) {
|
||||
m_first_dest[i] = m_init_value;
|
||||
@ -81,7 +83,6 @@ struct ExclusiveScanDefaultFunctorWithValueWrapper {
|
||||
}
|
||||
}
|
||||
|
||||
const auto tmp = value_type{m_first_from[i], false};
|
||||
this->join(update, tmp);
|
||||
}
|
||||
|
||||
@ -132,6 +133,7 @@ struct TransformExclusiveScanFunctorWithValueWrapper {
|
||||
KOKKOS_FUNCTION
|
||||
void operator()(const IndexType i, value_type& update,
|
||||
const bool final_pass) const {
|
||||
const auto tmp = value_type{m_unary_op(m_first_from[i]), false};
|
||||
if (final_pass) {
|
||||
if (i == 0) {
|
||||
// for both ExclusiveScan and TransformExclusiveScan,
|
||||
@ -142,7 +144,6 @@ struct TransformExclusiveScanFunctorWithValueWrapper {
|
||||
}
|
||||
}
|
||||
|
||||
const auto tmp = value_type{m_unary_op(m_first_from[i]), false};
|
||||
this->join(update, tmp);
|
||||
}
|
||||
|
||||
@ -190,6 +191,7 @@ struct TransformExclusiveScanFunctorWithoutValueWrapper {
|
||||
KOKKOS_FUNCTION
|
||||
void operator()(const IndexType i, ValueType& update,
|
||||
const bool final_pass) const {
|
||||
const auto tmp = ValueType{m_unary_op(m_first_from[i])};
|
||||
if (final_pass) {
|
||||
if (i == 0) {
|
||||
// for both ExclusiveScan and TransformExclusiveScan,
|
||||
@ -200,7 +202,6 @@ struct TransformExclusiveScanFunctorWithoutValueWrapper {
|
||||
}
|
||||
}
|
||||
|
||||
const auto tmp = ValueType{m_unary_op(m_first_from[i])};
|
||||
this->join(update, tmp);
|
||||
}
|
||||
|
||||
|
||||
@ -46,15 +46,14 @@ struct StdRemoveIfStage1Functor {
|
||||
void operator()(const IndexType i, IndexType& update,
|
||||
const bool final_pass) const {
|
||||
auto& myval = m_first_from[i];
|
||||
if (final_pass) {
|
||||
if (!m_must_remove(myval)) {
|
||||
|
||||
if (!m_must_remove(myval)) {
|
||||
if (final_pass) {
|
||||
// calling move here is ok because we are inside final pass
|
||||
// we are calling move assign as specified by the std
|
||||
m_first_dest[update] = std::move(myval);
|
||||
}
|
||||
}
|
||||
|
||||
if (!m_must_remove(myval)) {
|
||||
update += 1;
|
||||
}
|
||||
}
|
||||
@ -108,7 +107,9 @@ IteratorType remove_if_exespace_impl(const std::string& label,
|
||||
// create helper tmp view
|
||||
using value_type = typename IteratorType::value_type;
|
||||
using tmp_view_type = Kokkos::View<value_type*, ExecutionSpace>;
|
||||
tmp_view_type tmp_view("std_remove_if_tmp_view", keep_count);
|
||||
tmp_view_type tmp_view(Kokkos::view_alloc(Kokkos::WithoutInitializing, ex,
|
||||
"std_remove_if_tmp_view"),
|
||||
keep_count);
|
||||
using tmp_readwrite_iterator_type = decltype(begin(tmp_view));
|
||||
|
||||
// in stage 1, *move* all elements to keep from original range to tmp
|
||||
|
||||
@ -21,7 +21,6 @@
|
||||
#include "Kokkos_Constraints.hpp"
|
||||
#include "Kokkos_HelperPredicates.hpp"
|
||||
#include <std_algorithms/Kokkos_Distance.hpp>
|
||||
#include <std_algorithms/Kokkos_Swap.hpp>
|
||||
#include <string>
|
||||
|
||||
namespace Kokkos {
|
||||
@ -39,7 +38,7 @@ struct StdReverseFunctor {
|
||||
|
||||
KOKKOS_FUNCTION
|
||||
void operator()(index_type i) const {
|
||||
::Kokkos::Experimental::swap(m_first[i], m_last[-i - 1]);
|
||||
::Kokkos::kokkos_swap(m_first[i], m_last[-i - 1]);
|
||||
}
|
||||
|
||||
KOKKOS_FUNCTION
|
||||
|
||||
@ -126,10 +126,11 @@ KOKKOS_FUNCTION IteratorType shift_left_team_impl(
|
||||
// execution space impl because for this team impl we are
|
||||
// within a parallel region, so for now we solve serially
|
||||
|
||||
const std::size_t numElementsToMove =
|
||||
using difference_type = typename IteratorType::difference_type;
|
||||
const difference_type numElementsToMove =
|
||||
::Kokkos::Experimental::distance(first + n, last);
|
||||
Kokkos::single(Kokkos::PerTeam(teamHandle), [=]() {
|
||||
for (std::size_t i = 0; i < numElementsToMove; ++i) {
|
||||
for (difference_type i = 0; i < numElementsToMove; ++i) {
|
||||
first[i] = std::move(first[i + n]);
|
||||
}
|
||||
});
|
||||
|
||||
@ -103,26 +103,6 @@ IteratorType shift_right_exespace_impl(
|
||||
return first + n;
|
||||
}
|
||||
|
||||
template <class Iterator>
|
||||
struct StdShiftRightTeamSingleFunctor {
|
||||
Iterator m_first;
|
||||
Iterator m_last;
|
||||
std::size_t m_shift;
|
||||
|
||||
KOKKOS_FUNCTION
|
||||
void operator()() const {
|
||||
// the impl function calling this functor guarantees that
|
||||
// - m_shift is non-negative
|
||||
// - m_first, m_last identify a valid range with m_last > m_first
|
||||
// - m_shift is less than m_last - m_first
|
||||
// so I can safely use std::size_t here
|
||||
}
|
||||
|
||||
KOKKOS_FUNCTION
|
||||
StdShiftRightTeamSingleFunctor(Iterator _first, Iterator _last, std::size_t n)
|
||||
: m_first(std::move(_first)), m_last(std::move(_last)), m_shift(n) {}
|
||||
};
|
||||
|
||||
template <class TeamHandleType, class IteratorType>
|
||||
KOKKOS_FUNCTION IteratorType shift_right_team_impl(
|
||||
const TeamHandleType& teamHandle, IteratorType first, IteratorType last,
|
||||
@ -145,10 +125,11 @@ KOKKOS_FUNCTION IteratorType shift_right_team_impl(
|
||||
// execution space impl because for this team impl we are
|
||||
// within a parallel region, so for now we solve serially
|
||||
|
||||
const std::size_t numElementsToMove =
|
||||
using difference_type = typename IteratorType::difference_type;
|
||||
const difference_type numElementsToMove =
|
||||
::Kokkos::Experimental::distance(first, last - n);
|
||||
Kokkos::single(Kokkos::PerTeam(teamHandle), [=]() {
|
||||
for (std::size_t i = 0; i < numElementsToMove; ++i) {
|
||||
for (difference_type i = 0; i < numElementsToMove; ++i) {
|
||||
last[-i - 1] = std::move(last[-n - i - 1]);
|
||||
}
|
||||
});
|
||||
|
||||
@ -21,7 +21,6 @@
|
||||
#include "Kokkos_Constraints.hpp"
|
||||
#include "Kokkos_HelperPredicates.hpp"
|
||||
#include <std_algorithms/Kokkos_Distance.hpp>
|
||||
#include <std_algorithms/Kokkos_Swap.hpp>
|
||||
#include <string>
|
||||
|
||||
namespace Kokkos {
|
||||
@ -36,7 +35,7 @@ struct StdSwapRangesFunctor {
|
||||
|
||||
KOKKOS_FUNCTION
|
||||
void operator()(index_type i) const {
|
||||
::Kokkos::Experimental::swap(m_first1[i], m_first2[i]);
|
||||
::Kokkos::kokkos_swap(m_first1[i], m_first2[i]);
|
||||
}
|
||||
|
||||
KOKKOS_FUNCTION
|
||||
|
||||
@ -105,7 +105,9 @@ IteratorType unique_exespace_impl(const std::string& label,
|
||||
// using the same algorithm used for unique_copy but we now move things
|
||||
using value_type = typename IteratorType::value_type;
|
||||
using tmp_view_type = Kokkos::View<value_type*, ExecutionSpace>;
|
||||
tmp_view_type tmp_view("std_unique_tmp_view", num_elements_to_explore);
|
||||
tmp_view_type tmp_view(Kokkos::view_alloc(ex, Kokkos::WithoutInitializing,
|
||||
"std_unique_tmp_view"),
|
||||
num_elements_to_explore);
|
||||
|
||||
// scan extent is: num_elements_to_explore - 1
|
||||
// for same reason as the one explained in unique_copy
|
||||
|
||||
@ -25,6 +25,7 @@ foreach(Tag Threads;Serial;OpenMP;Cuda;HPX;HIP;SYCL;OpenMPTarget)
|
||||
set(ALGO_SORT_SOURCES)
|
||||
foreach(SOURCE_Input
|
||||
TestSort
|
||||
TestSortByKey
|
||||
TestSortCustomComp
|
||||
TestBinSortA
|
||||
TestBinSortB
|
||||
@ -57,35 +58,37 @@ foreach(Tag Threads;Serial;OpenMP;Cuda;HPX;HIP;SYCL;OpenMPTarget)
|
||||
configure_file(${dir}/dummy.cpp ${file})
|
||||
list(APPEND ALGO_RANDOM_SOURCES ${file})
|
||||
endforeach()
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
# ------------------------------------------
|
||||
# std set A
|
||||
# ------------------------------------------
|
||||
set(STDALGO_SOURCES_A)
|
||||
foreach(Name
|
||||
# ------------------------------------------
|
||||
# std set A
|
||||
# ------------------------------------------
|
||||
set(STDALGO_SOURCES_A)
|
||||
foreach(Name
|
||||
StdReducers
|
||||
StdAlgorithmsConstraints
|
||||
RandomAccessIterator
|
||||
)
|
||||
list(APPEND STDALGO_SOURCES_A Test${Name}.cpp)
|
||||
endforeach()
|
||||
)
|
||||
list(APPEND STDALGO_SOURCES_A Test${Name}.cpp)
|
||||
endforeach()
|
||||
|
||||
# ------------------------------------------
|
||||
# std set B
|
||||
# ------------------------------------------
|
||||
set(STDALGO_SOURCES_B)
|
||||
foreach(Name
|
||||
# ------------------------------------------
|
||||
# std set B
|
||||
# ------------------------------------------
|
||||
set(STDALGO_SOURCES_B)
|
||||
foreach(Name
|
||||
StdAlgorithmsCommon
|
||||
StdAlgorithmsMinMaxElementOps
|
||||
)
|
||||
list(APPEND STDALGO_SOURCES_B Test${Name}.cpp)
|
||||
endforeach()
|
||||
)
|
||||
list(APPEND STDALGO_SOURCES_B Test${Name}.cpp)
|
||||
endforeach()
|
||||
|
||||
# ------------------------------------------
|
||||
# std set C
|
||||
# ------------------------------------------
|
||||
set(STDALGO_SOURCES_C)
|
||||
foreach(Name
|
||||
# ------------------------------------------
|
||||
# std set C
|
||||
# ------------------------------------------
|
||||
set(STDALGO_SOURCES_C)
|
||||
foreach(Name
|
||||
StdAlgorithmsCommon
|
||||
StdAlgorithmsLexicographicalCompare
|
||||
StdAlgorithmsForEach
|
||||
@ -100,15 +103,15 @@ foreach(Tag Threads;Serial;OpenMP;Cuda;HPX;HIP;SYCL;OpenMPTarget)
|
||||
StdAlgorithmsSearch_n
|
||||
StdAlgorithmsMismatch
|
||||
StdAlgorithmsMoveBackward
|
||||
)
|
||||
list(APPEND STDALGO_SOURCES_C Test${Name}.cpp)
|
||||
endforeach()
|
||||
)
|
||||
list(APPEND STDALGO_SOURCES_C Test${Name}.cpp)
|
||||
endforeach()
|
||||
|
||||
# ------------------------------------------
|
||||
# std set D
|
||||
# ------------------------------------------
|
||||
set(STDALGO_SOURCES_D)
|
||||
foreach(Name
|
||||
# ------------------------------------------
|
||||
# std set D
|
||||
# ------------------------------------------
|
||||
set(STDALGO_SOURCES_D)
|
||||
foreach(Name
|
||||
StdAlgorithmsCommon
|
||||
StdAlgorithmsModOps
|
||||
StdAlgorithmsModSeqOps
|
||||
@ -128,15 +131,15 @@ foreach(Tag Threads;Serial;OpenMP;Cuda;HPX;HIP;SYCL;OpenMPTarget)
|
||||
StdAlgorithmsReverse
|
||||
StdAlgorithmsShiftLeft
|
||||
StdAlgorithmsShiftRight
|
||||
)
|
||||
list(APPEND STDALGO_SOURCES_D Test${Name}.cpp)
|
||||
endforeach()
|
||||
)
|
||||
list(APPEND STDALGO_SOURCES_D Test${Name}.cpp)
|
||||
endforeach()
|
||||
|
||||
# ------------------------------------------
|
||||
# std set E
|
||||
# ------------------------------------------
|
||||
set(STDALGO_SOURCES_E)
|
||||
foreach(Name
|
||||
# ------------------------------------------
|
||||
# std set E
|
||||
# ------------------------------------------
|
||||
set(STDALGO_SOURCES_E)
|
||||
foreach(Name
|
||||
StdAlgorithmsCommon
|
||||
StdAlgorithmsIsSorted
|
||||
StdAlgorithmsIsSortedUntil
|
||||
@ -149,83 +152,83 @@ foreach(Tag Threads;Serial;OpenMP;Cuda;HPX;HIP;SYCL;OpenMPTarget)
|
||||
StdAlgorithmsTransformUnaryOp
|
||||
StdAlgorithmsTransformExclusiveScan
|
||||
StdAlgorithmsTransformInclusiveScan
|
||||
)
|
||||
list(APPEND STDALGO_SOURCES_E Test${Name}.cpp)
|
||||
endforeach()
|
||||
)
|
||||
list(APPEND STDALGO_SOURCES_E Test${Name}.cpp)
|
||||
endforeach()
|
||||
|
||||
# ------------------------------------------
|
||||
# std team Q
|
||||
# ------------------------------------------
|
||||
set(STDALGO_TEAM_SOURCES_Q)
|
||||
foreach(Name
|
||||
# ------------------------------------------
|
||||
# std team Q
|
||||
# ------------------------------------------
|
||||
set(STDALGO_TEAM_SOURCES_Q)
|
||||
foreach(Name
|
||||
StdAlgorithmsCommon
|
||||
StdAlgorithmsTeamInclusiveScan
|
||||
StdAlgorithmsTeamTransformInclusiveScan
|
||||
)
|
||||
list(APPEND STDALGO_TEAM_SOURCES_Q Test${Name}.cpp)
|
||||
endforeach()
|
||||
)
|
||||
list(APPEND STDALGO_TEAM_SOURCES_Q Test${Name}.cpp)
|
||||
endforeach()
|
||||
|
||||
# ------------------------------------------
|
||||
# std team P
|
||||
# ------------------------------------------
|
||||
set(STDALGO_TEAM_SOURCES_P)
|
||||
foreach(Name
|
||||
# ------------------------------------------
|
||||
# std team P
|
||||
# ------------------------------------------
|
||||
set(STDALGO_TEAM_SOURCES_P)
|
||||
foreach(Name
|
||||
StdAlgorithmsCommon
|
||||
StdAlgorithmsTeamExclusiveScan
|
||||
StdAlgorithmsTeamTransformExclusiveScan
|
||||
)
|
||||
list(APPEND STDALGO_TEAM_SOURCES_P Test${Name}.cpp)
|
||||
endforeach()
|
||||
)
|
||||
list(APPEND STDALGO_TEAM_SOURCES_P Test${Name}.cpp)
|
||||
endforeach()
|
||||
|
||||
# ------------------------------------------
|
||||
# std team M
|
||||
# ------------------------------------------
|
||||
set(STDALGO_TEAM_SOURCES_M)
|
||||
foreach(Name
|
||||
# ------------------------------------------
|
||||
# std team M
|
||||
# ------------------------------------------
|
||||
set(STDALGO_TEAM_SOURCES_M)
|
||||
foreach(Name
|
||||
StdAlgorithmsCommon
|
||||
StdAlgorithmsTeamTransformUnaryOp
|
||||
StdAlgorithmsTeamTransformBinaryOp
|
||||
StdAlgorithmsTeamGenerate
|
||||
StdAlgorithmsTeamGenerate_n
|
||||
StdAlgorithmsTeamSwapRanges
|
||||
)
|
||||
list(APPEND STDALGO_TEAM_SOURCES_M Test${Name}.cpp)
|
||||
endforeach()
|
||||
)
|
||||
list(APPEND STDALGO_TEAM_SOURCES_M Test${Name}.cpp)
|
||||
endforeach()
|
||||
|
||||
# ------------------------------------------
|
||||
# std team L
|
||||
# ------------------------------------------
|
||||
set(STDALGO_TEAM_SOURCES_L)
|
||||
foreach(Name
|
||||
# ------------------------------------------
|
||||
# std team L
|
||||
# ------------------------------------------
|
||||
set(STDALGO_TEAM_SOURCES_L)
|
||||
foreach(Name
|
||||
StdAlgorithmsCommon
|
||||
StdAlgorithmsTeamIsSorted
|
||||
StdAlgorithmsTeamIsSortedUntil
|
||||
StdAlgorithmsTeamIsPartitioned
|
||||
StdAlgorithmsTeamPartitionCopy
|
||||
StdAlgorithmsTeamPartitionPoint
|
||||
)
|
||||
list(APPEND STDALGO_TEAM_SOURCES_L Test${Name}.cpp)
|
||||
endforeach()
|
||||
)
|
||||
list(APPEND STDALGO_TEAM_SOURCES_L Test${Name}.cpp)
|
||||
endforeach()
|
||||
|
||||
# ------------------------------------------
|
||||
# std team I
|
||||
# ------------------------------------------
|
||||
set(STDALGO_TEAM_SOURCES_I)
|
||||
foreach(Name
|
||||
# ------------------------------------------
|
||||
# std team I
|
||||
# ------------------------------------------
|
||||
set(STDALGO_TEAM_SOURCES_I)
|
||||
foreach(Name
|
||||
StdAlgorithmsCommon
|
||||
StdAlgorithmsTeamUnique
|
||||
StdAlgorithmsTeamAdjacentDifference
|
||||
StdAlgorithmsTeamReduce
|
||||
StdAlgorithmsTeamTransformReduce
|
||||
)
|
||||
list(APPEND STDALGO_TEAM_SOURCES_I Test${Name}.cpp)
|
||||
endforeach()
|
||||
)
|
||||
list(APPEND STDALGO_TEAM_SOURCES_I Test${Name}.cpp)
|
||||
endforeach()
|
||||
|
||||
# ------------------------------------------
|
||||
# std team H
|
||||
# ------------------------------------------
|
||||
set(STDALGO_TEAM_SOURCES_H)
|
||||
foreach(Name
|
||||
# ------------------------------------------
|
||||
# std team H
|
||||
# ------------------------------------------
|
||||
set(STDALGO_TEAM_SOURCES_H)
|
||||
foreach(Name
|
||||
StdAlgorithmsCommon
|
||||
StdAlgorithmsTeamCopy
|
||||
StdAlgorithmsTeamCopy_n
|
||||
@ -236,43 +239,43 @@ foreach(Tag Threads;Serial;OpenMP;Cuda;HPX;HIP;SYCL;OpenMPTarget)
|
||||
StdAlgorithmsTeamRemoveIf
|
||||
StdAlgorithmsTeamRemoveCopy
|
||||
StdAlgorithmsTeamRemoveCopyIf
|
||||
)
|
||||
list(APPEND STDALGO_TEAM_SOURCES_H Test${Name}.cpp)
|
||||
endforeach()
|
||||
)
|
||||
list(APPEND STDALGO_TEAM_SOURCES_H Test${Name}.cpp)
|
||||
endforeach()
|
||||
|
||||
# ------------------------------------------
|
||||
# std team G
|
||||
# ------------------------------------------
|
||||
set(STDALGO_TEAM_SOURCES_G)
|
||||
foreach(Name
|
||||
# ------------------------------------------
|
||||
# std team G
|
||||
# ------------------------------------------
|
||||
set(STDALGO_TEAM_SOURCES_G)
|
||||
foreach(Name
|
||||
StdAlgorithmsCommon
|
||||
StdAlgorithmsTeamMove
|
||||
StdAlgorithmsTeamMoveBackward
|
||||
StdAlgorithmsTeamShiftLeft
|
||||
StdAlgorithmsTeamShiftRight
|
||||
)
|
||||
list(APPEND STDALGO_TEAM_SOURCES_G Test${Name}.cpp)
|
||||
endforeach()
|
||||
)
|
||||
list(APPEND STDALGO_TEAM_SOURCES_G Test${Name}.cpp)
|
||||
endforeach()
|
||||
|
||||
# ------------------------------------------
|
||||
# std team F
|
||||
# ------------------------------------------
|
||||
set(STDALGO_TEAM_SOURCES_F)
|
||||
foreach(Name
|
||||
# ------------------------------------------
|
||||
# std team F
|
||||
# ------------------------------------------
|
||||
set(STDALGO_TEAM_SOURCES_F)
|
||||
foreach(Name
|
||||
StdAlgorithmsCommon
|
||||
StdAlgorithmsTeamReverse
|
||||
StdAlgorithmsTeamReverseCopy
|
||||
StdAlgorithmsTeamRotate
|
||||
StdAlgorithmsTeamRotateCopy
|
||||
)
|
||||
list(APPEND STDALGO_TEAM_SOURCES_F Test${Name}.cpp)
|
||||
endforeach()
|
||||
)
|
||||
list(APPEND STDALGO_TEAM_SOURCES_F Test${Name}.cpp)
|
||||
endforeach()
|
||||
|
||||
# ------------------------------------------
|
||||
# std team E
|
||||
# ------------------------------------------
|
||||
set(STDALGO_TEAM_SOURCES_E)
|
||||
foreach(Name
|
||||
# ------------------------------------------
|
||||
# std team E
|
||||
# ------------------------------------------
|
||||
set(STDALGO_TEAM_SOURCES_E)
|
||||
foreach(Name
|
||||
StdAlgorithmsCommon
|
||||
StdAlgorithmsTeamFill
|
||||
StdAlgorithmsTeamFill_n
|
||||
@ -280,28 +283,28 @@ foreach(Tag Threads;Serial;OpenMP;Cuda;HPX;HIP;SYCL;OpenMPTarget)
|
||||
StdAlgorithmsTeamReplaceIf
|
||||
StdAlgorithmsTeamReplaceCopy
|
||||
StdAlgorithmsTeamReplaceCopyIf
|
||||
)
|
||||
list(APPEND STDALGO_TEAM_SOURCES_E Test${Name}.cpp)
|
||||
endforeach()
|
||||
)
|
||||
list(APPEND STDALGO_TEAM_SOURCES_E Test${Name}.cpp)
|
||||
endforeach()
|
||||
|
||||
# ------------------------------------------
|
||||
# std team D
|
||||
# ------------------------------------------
|
||||
set(STDALGO_TEAM_SOURCES_D)
|
||||
foreach(Name
|
||||
# ------------------------------------------
|
||||
# std team D
|
||||
# ------------------------------------------
|
||||
set(STDALGO_TEAM_SOURCES_D)
|
||||
foreach(Name
|
||||
StdAlgorithmsCommon
|
||||
StdAlgorithmsTeamMinElement
|
||||
StdAlgorithmsTeamMaxElement
|
||||
StdAlgorithmsTeamMinMaxElement
|
||||
)
|
||||
list(APPEND STDALGO_TEAM_SOURCES_D Test${Name}.cpp)
|
||||
endforeach()
|
||||
)
|
||||
list(APPEND STDALGO_TEAM_SOURCES_D Test${Name}.cpp)
|
||||
endforeach()
|
||||
|
||||
# ------------------------------------------
|
||||
# std team C
|
||||
# ------------------------------------------
|
||||
set(STDALGO_TEAM_SOURCES_C)
|
||||
foreach(Name
|
||||
# ------------------------------------------
|
||||
# std team C
|
||||
# ------------------------------------------
|
||||
set(STDALGO_TEAM_SOURCES_C)
|
||||
foreach(Name
|
||||
StdAlgorithmsCommon
|
||||
StdAlgorithmsTeamFind
|
||||
StdAlgorithmsTeamFindIf
|
||||
@ -310,29 +313,29 @@ foreach(Tag Threads;Serial;OpenMP;Cuda;HPX;HIP;SYCL;OpenMPTarget)
|
||||
StdAlgorithmsTeamAnyOf
|
||||
StdAlgorithmsTeamNoneOf
|
||||
StdAlgorithmsTeamSearchN
|
||||
)
|
||||
list(APPEND STDALGO_TEAM_SOURCES_C Test${Name}.cpp)
|
||||
endforeach()
|
||||
)
|
||||
list(APPEND STDALGO_TEAM_SOURCES_C Test${Name}.cpp)
|
||||
endforeach()
|
||||
|
||||
# ------------------------------------------
|
||||
# std team B
|
||||
# ------------------------------------------
|
||||
set(STDALGO_TEAM_SOURCES_B)
|
||||
foreach(Name
|
||||
# ------------------------------------------
|
||||
# std team B
|
||||
# ------------------------------------------
|
||||
set(STDALGO_TEAM_SOURCES_B)
|
||||
foreach(Name
|
||||
StdAlgorithmsCommon
|
||||
StdAlgorithmsTeamEqual
|
||||
StdAlgorithmsTeamSearch
|
||||
StdAlgorithmsTeamFindEnd
|
||||
StdAlgorithmsTeamFindFirstOf
|
||||
)
|
||||
list(APPEND STDALGO_TEAM_SOURCES_B Test${Name}.cpp)
|
||||
endforeach()
|
||||
)
|
||||
list(APPEND STDALGO_TEAM_SOURCES_B Test${Name}.cpp)
|
||||
endforeach()
|
||||
|
||||
# ------------------------------------------
|
||||
# std team A
|
||||
# ------------------------------------------
|
||||
set(STDALGO_TEAM_SOURCES_A)
|
||||
foreach(Name
|
||||
# ------------------------------------------
|
||||
# std team A
|
||||
# ------------------------------------------
|
||||
set(STDALGO_TEAM_SOURCES_A)
|
||||
foreach(Name
|
||||
StdAlgorithmsCommon
|
||||
StdAlgorithmsTeamAdjacentFind
|
||||
StdAlgorithmsTeamCount
|
||||
@ -341,11 +344,8 @@ foreach(Tag Threads;Serial;OpenMP;Cuda;HPX;HIP;SYCL;OpenMPTarget)
|
||||
StdAlgorithmsTeamForEachN
|
||||
StdAlgorithmsTeamLexicographicalCompare
|
||||
StdAlgorithmsTeamMismatch
|
||||
)
|
||||
list(APPEND STDALGO_TEAM_SOURCES_A Test${Name}.cpp)
|
||||
endforeach()
|
||||
|
||||
endif()
|
||||
)
|
||||
list(APPEND STDALGO_TEAM_SOURCES_A Test${Name}.cpp)
|
||||
endforeach()
|
||||
|
||||
# FIXME_OPENMPTARGET - remove sort test as it leads to ICE with clang/16 and above at compile time.
|
||||
|
||||
@ -27,13 +27,13 @@ TARGETS =
|
||||
|
||||
tmp := $(foreach device, $(KOKKOS_DEVICELIST), \
|
||||
$(if $(filter Test$(device).cpp, $(shell ls Test$(device).cpp 2>/dev/null)),,\
|
||||
$(shell echo "\#include <Test"${device}"_Category.hpp>" > Test$(device).cpp); \
|
||||
$(shell echo "\#include <TestRandom.hpp>" >> Test$(device).cpp); \
|
||||
$(shell echo "\#include <TestSort.hpp>" >> Test$(device).cpp); \
|
||||
$(shell echo "\#include <TestBinSortA.hpp>" >> Test$(device).cpp); \
|
||||
$(shell echo "\#include <TestBinSortB.hpp>" >> Test$(device).cpp); \
|
||||
$(shell echo "\#include <TestNestedSort.hpp>" >> Test$(device).cpp); \
|
||||
$(shell echo "\#include <TestSortCustomComp.hpp>" >> Test$(device).cpp); \
|
||||
$(shell echo "$(H)include <Test"${device}"_Category.hpp>" > Test$(device).cpp); \
|
||||
$(shell echo "$(H)include <TestRandom.hpp>" >> Test$(device).cpp); \
|
||||
$(shell echo "$(H)include <TestSort.hpp>" >> Test$(device).cpp); \
|
||||
$(shell echo "$(H)include <TestBinSortA.hpp>" >> Test$(device).cpp); \
|
||||
$(shell echo "$(H)include <TestBinSortB.hpp>" >> Test$(device).cpp); \
|
||||
$(shell echo "$(H)include <TestNestedSort.hpp>" >> Test$(device).cpp); \
|
||||
$(shell echo "$(H)include <TestSortCustomComp.hpp>" >> Test$(device).cpp); \
|
||||
) \
|
||||
)
|
||||
|
||||
|
||||
241
lib/kokkos/algorithms/unit_tests/TestSortByKey.hpp
Normal file
241
lib/kokkos/algorithms/unit_tests/TestSortByKey.hpp
Normal file
@ -0,0 +1,241 @@
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 4.0
|
||||
// Copyright (2022) National Technology & Engineering
|
||||
// Solutions of Sandia, LLC (NTESS).
|
||||
//
|
||||
// Under the terms of Contract DE-NA0003525 with NTESS,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://kokkos.org/LICENSE for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//@HEADER
|
||||
|
||||
#ifndef KOKKOS_ALGORITHMS_UNITTESTS_TEST_SORT_BY_KEY_HPP
|
||||
#define KOKKOS_ALGORITHMS_UNITTESTS_TEST_SORT_BY_KEY_HPP
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
#include <Kokkos_Core.hpp>
|
||||
#include <Kokkos_Random.hpp>
|
||||
#include <Kokkos_Sort.hpp>
|
||||
|
||||
#include <utility> // pair
|
||||
|
||||
namespace Test {
|
||||
namespace SortImpl {
|
||||
|
||||
struct Less {
|
||||
template <class ValueType>
|
||||
KOKKOS_INLINE_FUNCTION bool operator()(const ValueType &lhs,
|
||||
const ValueType &rhs) const {
|
||||
return lhs < rhs;
|
||||
}
|
||||
};
|
||||
|
||||
struct Greater {
|
||||
template <class ValueType>
|
||||
KOKKOS_INLINE_FUNCTION bool operator()(const ValueType &lhs,
|
||||
const ValueType &rhs) const {
|
||||
return lhs > rhs;
|
||||
}
|
||||
};
|
||||
|
||||
template <class ExecutionSpace, class Keys, class Permute,
|
||||
class Comparator = Less>
|
||||
struct is_sorted_by_key_struct {
|
||||
Keys keys;
|
||||
Keys keys_orig;
|
||||
Permute permute;
|
||||
Comparator comparator;
|
||||
|
||||
is_sorted_by_key_struct(Keys keys_, Keys keys_orig_, Permute permute_,
|
||||
Comparator comparator_ = Comparator{})
|
||||
: keys(keys_),
|
||||
keys_orig(keys_orig_),
|
||||
permute(permute_),
|
||||
comparator(comparator_) {}
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(int i, unsigned int &count) const {
|
||||
if (i < keys.extent_int(0) - 1 && comparator(keys(i + 1), keys(i))) ++count;
|
||||
if (keys(i) != keys_orig(permute(i))) ++count;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename ExecutionSpace, typename ViewType>
|
||||
void iota(ExecutionSpace const &space, ViewType const &v,
|
||||
typename ViewType::value_type value = 0) {
|
||||
using ValueType = typename ViewType::value_type;
|
||||
Kokkos::parallel_for(
|
||||
"ArborX::Algorithms::iota",
|
||||
Kokkos::RangePolicy<ExecutionSpace>(space, 0, v.extent(0)),
|
||||
KOKKOS_LAMBDA(int i) { v(i) = value + (ValueType)i; });
|
||||
}
|
||||
|
||||
} // namespace SortImpl
|
||||
|
||||
TEST(TEST_CATEGORY, SortByKeyEmptyView) {
|
||||
using ExecutionSpace = TEST_EXECSPACE;
|
||||
|
||||
// does not matter if we use int or something else
|
||||
Kokkos::View<int *, ExecutionSpace> keys("keys", 0);
|
||||
Kokkos::View<float *, ExecutionSpace> values("values", 0);
|
||||
|
||||
ASSERT_NO_THROW(
|
||||
Kokkos::Experimental::sort_by_key(ExecutionSpace(), keys, values));
|
||||
}
|
||||
|
||||
TEST(TEST_CATEGORY, SortByKey) {
|
||||
using ExecutionSpace = TEST_EXECSPACE;
|
||||
using MemorySpace = typename ExecutionSpace::memory_space;
|
||||
|
||||
ExecutionSpace space{};
|
||||
|
||||
for (auto keys_vector : {std::vector<int>{36, 19, 25, 17, 3, 7, 1, 2, 9},
|
||||
std::vector<int>{36, 19, 25, 17, 3, 9, 1, 2, 7},
|
||||
std::vector<int>{100, 19, 36, 17, 3, 25, 1, 2, 7},
|
||||
std::vector<int>{15, 5, 11, 3, 4, 8}}) {
|
||||
auto const n = keys_vector.size();
|
||||
|
||||
auto keys = Kokkos::create_mirror_view_and_copy(
|
||||
MemorySpace{},
|
||||
Kokkos::View<int *, Kokkos::HostSpace, Kokkos::MemoryUnmanaged>(
|
||||
keys_vector.data(), n));
|
||||
|
||||
auto keys_orig = Kokkos::create_mirror(space, keys);
|
||||
Kokkos::deep_copy(space, keys_orig, keys);
|
||||
|
||||
Kokkos::View<int *, ExecutionSpace> permute("permute", n);
|
||||
SortImpl::iota(space, permute);
|
||||
|
||||
Kokkos::Experimental::sort_by_key(space, keys, permute);
|
||||
|
||||
unsigned int sort_fails = 0;
|
||||
Kokkos::parallel_reduce(
|
||||
Kokkos::RangePolicy<ExecutionSpace>(space, 0, n),
|
||||
SortImpl::is_sorted_by_key_struct<ExecutionSpace, decltype(keys),
|
||||
decltype(permute)>(keys, keys_orig,
|
||||
permute),
|
||||
sort_fails);
|
||||
|
||||
ASSERT_EQ(sort_fails, 0u);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(TEST_CATEGORY, SortByKeyWithComparator) {
|
||||
using ExecutionSpace = TEST_EXECSPACE;
|
||||
using MemorySpace = typename ExecutionSpace::memory_space;
|
||||
|
||||
ExecutionSpace space{};
|
||||
|
||||
SortImpl::Greater comparator;
|
||||
|
||||
for (auto keys_vector : {std::vector<int>{36, 19, 25, 17, 3, 7, 1, 2, 9},
|
||||
std::vector<int>{36, 19, 25, 17, 3, 9, 1, 2, 7},
|
||||
std::vector<int>{100, 19, 36, 17, 3, 25, 1, 2, 7},
|
||||
std::vector<int>{15, 5, 11, 3, 4, 8}}) {
|
||||
auto const n = keys_vector.size();
|
||||
|
||||
auto keys = Kokkos::create_mirror_view_and_copy(
|
||||
MemorySpace{},
|
||||
Kokkos::View<int *, Kokkos::HostSpace, Kokkos::MemoryUnmanaged>(
|
||||
keys_vector.data(), n));
|
||||
|
||||
auto keys_orig = Kokkos::create_mirror(space, keys);
|
||||
Kokkos::deep_copy(space, keys_orig, keys);
|
||||
|
||||
Kokkos::View<int *, ExecutionSpace> permute("permute", n);
|
||||
SortImpl::iota(space, permute);
|
||||
|
||||
Kokkos::Experimental::sort_by_key(space, keys, permute, comparator);
|
||||
|
||||
unsigned int sort_fails = 0;
|
||||
Kokkos::parallel_reduce(
|
||||
Kokkos::RangePolicy<ExecutionSpace>(space, 0, n),
|
||||
SortImpl::is_sorted_by_key_struct<ExecutionSpace, decltype(keys),
|
||||
decltype(permute), SortImpl::Greater>(
|
||||
keys, keys_orig, permute, comparator),
|
||||
sort_fails);
|
||||
|
||||
ASSERT_EQ(sort_fails, 0u);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(TEST_CATEGORY, SortByKeyStaticExtents) {
|
||||
using ExecutionSpace = TEST_EXECSPACE;
|
||||
|
||||
ExecutionSpace space{};
|
||||
|
||||
Kokkos::View<int[10], ExecutionSpace> keys("keys");
|
||||
|
||||
Kokkos::View<int[10], ExecutionSpace> values_static("values_static");
|
||||
ASSERT_NO_THROW(
|
||||
Kokkos::Experimental::sort_by_key(space, keys, values_static));
|
||||
|
||||
Kokkos::View<int *, ExecutionSpace> values_dynamic("values_dynamic", 10);
|
||||
ASSERT_NO_THROW(
|
||||
Kokkos::Experimental::sort_by_key(space, keys, values_dynamic));
|
||||
}
|
||||
|
||||
template <typename ExecutionSpace, typename Keys, typename Values>
|
||||
void buildViewsForStrided(ExecutionSpace const &space, int n, Keys &keys,
|
||||
Values &values) {
|
||||
Kokkos::parallel_for(
|
||||
"create_data",
|
||||
Kokkos::MDRangePolicy<Kokkos::Rank<3>, ExecutionSpace>(space, {0, 0, 0},
|
||||
{n, n, n}),
|
||||
KOKKOS_LAMBDA(int i, int j, int k) {
|
||||
keys(i, j, k) = n - i;
|
||||
values(i, j, k) = j;
|
||||
});
|
||||
}
|
||||
|
||||
TEST(TEST_CATEGORY, SortByKeyWithStrides) {
|
||||
using ExecutionSpace = TEST_EXECSPACE;
|
||||
|
||||
ExecutionSpace space{};
|
||||
|
||||
auto const n = 10;
|
||||
|
||||
Kokkos::View<int ***, ExecutionSpace> keys("keys", n, n, n);
|
||||
Kokkos::View<int ***, ExecutionSpace> values("values", n, n, n);
|
||||
buildViewsForStrided(space, n, keys, values);
|
||||
|
||||
auto keys_sub = Kokkos::subview(keys, Kokkos::ALL(), 1, 2);
|
||||
auto values_sub = Kokkos::subview(values, 4, Kokkos::ALL(), 6);
|
||||
|
||||
auto keys_orig = Kokkos::create_mirror(space, keys_sub);
|
||||
Kokkos::deep_copy(space, keys_orig, keys_sub);
|
||||
|
||||
Kokkos::Experimental::sort_by_key(space, keys_sub, values_sub);
|
||||
|
||||
unsigned int sort_fails = 0;
|
||||
Kokkos::parallel_reduce(
|
||||
Kokkos::RangePolicy<ExecutionSpace>(space, 0, n),
|
||||
SortImpl::is_sorted_by_key_struct<ExecutionSpace, decltype(keys_sub),
|
||||
decltype(values_sub)>(
|
||||
keys_sub, keys_orig, values_sub),
|
||||
sort_fails);
|
||||
|
||||
ASSERT_EQ(sort_fails, 0u);
|
||||
}
|
||||
|
||||
TEST(TEST_CATEGORY, SortByKeyKeysLargerThanValues) {
|
||||
using ExecutionSpace = TEST_EXECSPACE;
|
||||
|
||||
// does not matter if we use int or something else
|
||||
Kokkos::View<int *, ExecutionSpace> keys("keys", 3);
|
||||
Kokkos::View<float *, ExecutionSpace> values("values", 1);
|
||||
|
||||
ASSERT_DEATH(
|
||||
Kokkos::Experimental::sort_by_key(ExecutionSpace(), keys, values),
|
||||
"values and keys extents must be the same");
|
||||
ASSERT_DEATH(Kokkos::Experimental::sort_by_key(ExecutionSpace(), keys, values,
|
||||
SortImpl::Greater{}),
|
||||
"values and keys extents must be the same");
|
||||
}
|
||||
|
||||
} // namespace Test
|
||||
#endif
|
||||
@ -239,16 +239,8 @@ KOKKOS_FUNCTION bool team_members_have_matching_result(
|
||||
// set accum to 1 if a mismach is found
|
||||
const bool mismatch = memberValue != target;
|
||||
int accum = static_cast<int>(mismatch);
|
||||
// FIXME_OPENMPTARGET: team API does not meet the TeamHandle concept and
|
||||
// ignores the reducer passed
|
||||
#if defined KOKKOS_ENABLE_OPENMPTARGET
|
||||
Kokkos::Sum<int> dummyReducer(accum);
|
||||
const auto result = teamHandle.team_reduce(accum, dummyReducer);
|
||||
return (result == 0);
|
||||
#else
|
||||
teamHandle.team_reduce(Kokkos::Sum<int>(accum));
|
||||
return (accum == 0);
|
||||
#endif
|
||||
}
|
||||
|
||||
template <class ValueType1, class ValueType2>
|
||||
|
||||
@ -16,6 +16,7 @@
|
||||
|
||||
#include <TestStdAlgorithmsCommon.hpp>
|
||||
#include <utility>
|
||||
#include <iomanip>
|
||||
|
||||
namespace Test {
|
||||
namespace stdalgos {
|
||||
@ -132,47 +133,6 @@ void my_host_exclusive_scan(it1 first, it1 last, it2 dest, ValType init,
|
||||
}
|
||||
}
|
||||
|
||||
template <class ViewType1, class ViewType2, class ValueType, class BinaryOp>
|
||||
void verify_data(ViewType1 data_view, // contains data
|
||||
ViewType2 test_view, // the view to test
|
||||
ValueType init_value, BinaryOp bop) {
|
||||
//! always careful because views might not be deep copyable
|
||||
|
||||
auto data_view_dc = create_deep_copyable_compatible_clone(data_view);
|
||||
auto data_view_h =
|
||||
create_mirror_view_and_copy(Kokkos::HostSpace(), data_view_dc);
|
||||
|
||||
using gold_view_value_type = typename ViewType2::value_type;
|
||||
Kokkos::View<gold_view_value_type*, Kokkos::HostSpace> gold_h(
|
||||
"goldh", data_view.extent(0));
|
||||
my_host_exclusive_scan(KE::cbegin(data_view_h), KE::cend(data_view_h),
|
||||
KE::begin(gold_h), init_value, bop);
|
||||
|
||||
auto test_view_dc = create_deep_copyable_compatible_clone(test_view);
|
||||
auto test_view_h =
|
||||
create_mirror_view_and_copy(Kokkos::HostSpace(), test_view_dc);
|
||||
if (test_view_h.extent(0) > 0) {
|
||||
for (std::size_t i = 0; i < test_view_h.extent(0); ++i) {
|
||||
// std::cout << i << " " << std::setprecision(15) << data_view_h(i) << " "
|
||||
// << gold_h(i) << " " << test_view_h(i) << " "
|
||||
// << std::abs(gold_h(i) - test_view_h(i)) << std::endl;
|
||||
if (std::is_same<gold_view_value_type, int>::value) {
|
||||
ASSERT_EQ(gold_h(i), test_view_h(i));
|
||||
} else {
|
||||
const auto error =
|
||||
std::abs(static_cast<double>(gold_h(i) - test_view_h(i)));
|
||||
if (error > 1e-10) {
|
||||
std::cout << i << " " << std::setprecision(15) << data_view_h(i)
|
||||
<< " " << gold_h(i) << " " << test_view_h(i) << " "
|
||||
<< std::abs(static_cast<double>(gold_h(i) - test_view_h(i)))
|
||||
<< std::endl;
|
||||
}
|
||||
EXPECT_LT(error, 1e-10);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <class ValueType>
|
||||
struct MultiplyFunctor {
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
@ -189,107 +149,153 @@ struct SumFunctor {
|
||||
}
|
||||
};
|
||||
|
||||
struct VerifyData {
|
||||
template <class ViewType1, class ViewType2, class ValueType, class BinaryOp>
|
||||
void operator()(ViewType1 data_view, // contains data
|
||||
ViewType2 test_view, // the view to test
|
||||
ValueType init_value, BinaryOp bop) {
|
||||
//! always careful because views might not be deep copyable
|
||||
|
||||
auto data_view_dc = create_deep_copyable_compatible_clone(data_view);
|
||||
auto data_view_h =
|
||||
create_mirror_view_and_copy(Kokkos::HostSpace(), data_view_dc);
|
||||
|
||||
using gold_view_value_type = typename ViewType2::value_type;
|
||||
Kokkos::View<gold_view_value_type*, Kokkos::HostSpace> gold_h(
|
||||
"goldh", data_view.extent(0));
|
||||
my_host_exclusive_scan(KE::cbegin(data_view_h), KE::cend(data_view_h),
|
||||
KE::begin(gold_h), init_value, bop);
|
||||
|
||||
auto test_view_dc = create_deep_copyable_compatible_clone(test_view);
|
||||
auto test_view_h =
|
||||
create_mirror_view_and_copy(Kokkos::HostSpace(), test_view_dc);
|
||||
if (test_view_h.extent(0) > 0) {
|
||||
for (std::size_t i = 0; i < test_view_h.extent(0); ++i) {
|
||||
if (std::is_same<gold_view_value_type, int>::value) {
|
||||
ASSERT_EQ(gold_h(i), test_view_h(i));
|
||||
} else {
|
||||
const auto error =
|
||||
std::abs(static_cast<double>(gold_h(i) - test_view_h(i)));
|
||||
ASSERT_LT(error, 1e-10) << i << " " << std::setprecision(15) << error
|
||||
<< static_cast<double>(test_view_h(i)) << " "
|
||||
<< static_cast<double>(gold_h(i));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <class ViewType1, class ViewType2, class ValueType>
|
||||
void operator()(ViewType1 data_view, // contains data
|
||||
ViewType2 test_view, // the view to test
|
||||
ValueType init_value) {
|
||||
(*this)(data_view, test_view, init_value, SumFunctor<ValueType>());
|
||||
}
|
||||
};
|
||||
|
||||
std::string value_type_to_string(int) { return "int"; }
|
||||
|
||||
std::string value_type_to_string(double) { return "double"; }
|
||||
|
||||
template <class Tag, class ValueType, class InfoType>
|
||||
void run_single_scenario_default_op(const InfoType& scenario_info,
|
||||
ValueType init_value) {
|
||||
using default_op = SumFunctor<ValueType>;
|
||||
template <class Tag, class ValueType, class InfoType, class... OpOrEmpty>
|
||||
void run_single_scenario(const InfoType& scenario_info, ValueType init_value,
|
||||
OpOrEmpty... empty_or_op) {
|
||||
const auto name = std::get<0>(scenario_info);
|
||||
const std::size_t view_ext = std::get<1>(scenario_info);
|
||||
// std::cout << "exclusive_scan default op: " << name << ", "
|
||||
// << view_tag_to_string(Tag{}) << ", "
|
||||
// << value_type_to_string(ValueType()) << ", "
|
||||
// << "init = " << init_value << std::endl;
|
||||
|
||||
auto view_dest = create_view<ValueType>(Tag{}, view_ext, "exclusive_scan");
|
||||
auto view_from = create_view<ValueType>(Tag{}, view_ext, "exclusive_scan");
|
||||
fill_view(view_from, name);
|
||||
// view_dest is filled with zeros before calling the algorithm everytime to
|
||||
// ensure the algorithm does something meaningful
|
||||
|
||||
{
|
||||
fill_zero(view_dest);
|
||||
auto r = KE::exclusive_scan(exespace(), KE::cbegin(view_from),
|
||||
KE::cend(view_from), KE::begin(view_dest),
|
||||
init_value);
|
||||
init_value, empty_or_op...);
|
||||
ASSERT_EQ(r, KE::end(view_dest));
|
||||
verify_data(view_from, view_dest, init_value, default_op());
|
||||
VerifyData()(view_from, view_dest, init_value, empty_or_op...);
|
||||
}
|
||||
|
||||
{
|
||||
fill_zero(view_dest);
|
||||
auto r = KE::exclusive_scan("label", exespace(), KE::cbegin(view_from),
|
||||
KE::cend(view_from), KE::begin(view_dest),
|
||||
init_value);
|
||||
init_value, empty_or_op...);
|
||||
ASSERT_EQ(r, KE::end(view_dest));
|
||||
verify_data(view_from, view_dest, init_value, default_op());
|
||||
VerifyData()(view_from, view_dest, init_value, empty_or_op...);
|
||||
}
|
||||
|
||||
{
|
||||
fill_zero(view_dest);
|
||||
auto r = KE::exclusive_scan(exespace(), view_from, view_dest, init_value);
|
||||
auto r = KE::exclusive_scan(exespace(), view_from, view_dest, init_value,
|
||||
empty_or_op...);
|
||||
ASSERT_EQ(r, KE::end(view_dest));
|
||||
verify_data(view_from, view_dest, init_value, default_op());
|
||||
VerifyData()(view_from, view_dest, init_value, empty_or_op...);
|
||||
}
|
||||
|
||||
{
|
||||
fill_zero(view_dest);
|
||||
auto r = KE::exclusive_scan("label", exespace(), view_from, view_dest,
|
||||
init_value);
|
||||
init_value, empty_or_op...);
|
||||
ASSERT_EQ(r, KE::end(view_dest));
|
||||
verify_data(view_from, view_dest, init_value, default_op());
|
||||
VerifyData()(view_from, view_dest, init_value, empty_or_op...);
|
||||
}
|
||||
|
||||
Kokkos::fence();
|
||||
}
|
||||
|
||||
template <class Tag, class ValueType, class InfoType, class BinaryOp>
|
||||
void run_single_scenario_custom_op(const InfoType& scenario_info,
|
||||
ValueType init_value, BinaryOp bop) {
|
||||
template <class Tag, class ValueType, class InfoType, class... OpOrEmpty>
|
||||
void run_single_scenario_inplace(const InfoType& scenario_info,
|
||||
ValueType init_value,
|
||||
OpOrEmpty... empty_or_op) {
|
||||
const auto name = std::get<0>(scenario_info);
|
||||
const std::size_t view_ext = std::get<1>(scenario_info);
|
||||
// std::cout << "exclusive_scan custom op: " << name << ", "
|
||||
// << view_tag_to_string(Tag{}) << ", "
|
||||
// << value_type_to_string(ValueType()) << ", "
|
||||
// << "init = " << init_value << std::endl;
|
||||
|
||||
auto view_dest = create_view<ValueType>(Tag{}, view_ext, "exclusive_scan");
|
||||
auto view_from = create_view<ValueType>(Tag{}, view_ext, "exclusive_scan");
|
||||
fill_view(view_from, name);
|
||||
// since here we call the in-place operation, we need to use two views:
|
||||
// view1: filled according to what the scenario asks for and is not modified
|
||||
// view2: filled according to what the scenario asks for and used for the
|
||||
// in-place op Therefore, after the op is done, view2 should contain the
|
||||
// result of doing exclusive scan NOTE: view2 is filled below every time
|
||||
// because the algorithm acts in place
|
||||
|
||||
auto view1 =
|
||||
create_view<ValueType>(Tag{}, view_ext, "exclusive_scan_inplace_view1");
|
||||
fill_view(view1, name);
|
||||
|
||||
auto view2 =
|
||||
create_view<ValueType>(Tag{}, view_ext, "exclusive_scan_inplace_view2");
|
||||
{
|
||||
fill_zero(view_dest);
|
||||
auto r = KE::exclusive_scan(exespace(), KE::cbegin(view_from),
|
||||
KE::cend(view_from), KE::begin(view_dest),
|
||||
init_value, bop);
|
||||
ASSERT_EQ(r, KE::end(view_dest));
|
||||
verify_data(view_from, view_dest, init_value, bop);
|
||||
fill_view(view2, name);
|
||||
auto r = KE::exclusive_scan(exespace(), KE::cbegin(view2), KE::cend(view2),
|
||||
KE::begin(view2), init_value, empty_or_op...);
|
||||
ASSERT_EQ(r, KE::end(view2));
|
||||
VerifyData()(view1, view2, init_value, empty_or_op...);
|
||||
}
|
||||
|
||||
{
|
||||
fill_zero(view_dest);
|
||||
auto r = KE::exclusive_scan("label", exespace(), KE::cbegin(view_from),
|
||||
KE::cend(view_from), KE::begin(view_dest),
|
||||
init_value, bop);
|
||||
ASSERT_EQ(r, KE::end(view_dest));
|
||||
verify_data(view_from, view_dest, init_value, bop);
|
||||
fill_view(view2, name);
|
||||
auto r = KE::exclusive_scan("label", exespace(), KE::cbegin(view2),
|
||||
KE::cend(view2), KE::begin(view2), init_value,
|
||||
empty_or_op...);
|
||||
ASSERT_EQ(r, KE::end(view2));
|
||||
VerifyData()(view1, view2, init_value, empty_or_op...);
|
||||
}
|
||||
|
||||
{
|
||||
fill_zero(view_dest);
|
||||
auto r =
|
||||
KE::exclusive_scan(exespace(), view_from, view_dest, init_value, bop);
|
||||
ASSERT_EQ(r, KE::end(view_dest));
|
||||
verify_data(view_from, view_dest, init_value, bop);
|
||||
fill_view(view2, name);
|
||||
auto r = KE::exclusive_scan(exespace(), view2, view2, init_value,
|
||||
empty_or_op...);
|
||||
ASSERT_EQ(r, KE::end(view2));
|
||||
VerifyData()(view1, view2, init_value, empty_or_op...);
|
||||
}
|
||||
|
||||
{
|
||||
fill_zero(view_dest);
|
||||
auto r = KE::exclusive_scan("label", exespace(), view_from, view_dest,
|
||||
init_value, bop);
|
||||
ASSERT_EQ(r, KE::end(view_dest));
|
||||
verify_data(view_from, view_dest, init_value, bop);
|
||||
fill_view(view2, name);
|
||||
auto r = KE::exclusive_scan("label", exespace(), view2, view2, init_value,
|
||||
empty_or_op...);
|
||||
ASSERT_EQ(r, KE::end(view2));
|
||||
VerifyData()(view1, view2, init_value, empty_or_op...);
|
||||
}
|
||||
|
||||
Kokkos::fence();
|
||||
@ -303,34 +309,39 @@ void run_exclusive_scan_all_scenarios() {
|
||||
{"medium", 1103}, {"large", 10513}};
|
||||
|
||||
for (const auto& it : scenarios) {
|
||||
run_single_scenario_default_op<Tag, ValueType>(it, ValueType{0});
|
||||
run_single_scenario_default_op<Tag, ValueType>(it, ValueType{1});
|
||||
run_single_scenario_default_op<Tag, ValueType>(it, ValueType{-2});
|
||||
run_single_scenario_default_op<Tag, ValueType>(it, ValueType{3});
|
||||
run_single_scenario<Tag, ValueType>(it, ValueType{0});
|
||||
run_single_scenario<Tag, ValueType>(it, ValueType{1});
|
||||
run_single_scenario<Tag, ValueType>(it, ValueType{-2});
|
||||
run_single_scenario<Tag, ValueType>(it, ValueType{3});
|
||||
|
||||
run_single_scenario_inplace<Tag, ValueType>(it, ValueType{0});
|
||||
run_single_scenario_inplace<Tag, ValueType>(it, ValueType{-2});
|
||||
|
||||
#if !defined KOKKOS_ENABLE_OPENMPTARGET
|
||||
// custom multiply op is only run for small views otherwise it overflows
|
||||
if (it.first == "small-a" || it.first == "small-b") {
|
||||
using custom_bop_t = MultiplyFunctor<ValueType>;
|
||||
run_single_scenario_custom_op<Tag, ValueType>(it, ValueType{0},
|
||||
custom_bop_t());
|
||||
run_single_scenario_custom_op<Tag, ValueType>(it, ValueType{1},
|
||||
custom_bop_t());
|
||||
run_single_scenario_custom_op<Tag, ValueType>(it, ValueType{-2},
|
||||
custom_bop_t());
|
||||
run_single_scenario_custom_op<Tag, ValueType>(it, ValueType{3},
|
||||
custom_bop_t());
|
||||
run_single_scenario<Tag, ValueType>(it, ValueType{0}, custom_bop_t());
|
||||
run_single_scenario<Tag, ValueType>(it, ValueType{1}, custom_bop_t());
|
||||
run_single_scenario<Tag, ValueType>(it, ValueType{-2}, custom_bop_t());
|
||||
run_single_scenario<Tag, ValueType>(it, ValueType{3}, custom_bop_t());
|
||||
|
||||
run_single_scenario_inplace<Tag, ValueType>(it, ValueType{0},
|
||||
custom_bop_t());
|
||||
run_single_scenario_inplace<Tag, ValueType>(it, ValueType{-2},
|
||||
custom_bop_t());
|
||||
}
|
||||
|
||||
using custom_bop_t = SumFunctor<ValueType>;
|
||||
run_single_scenario_custom_op<Tag, ValueType>(it, ValueType{0},
|
||||
custom_bop_t());
|
||||
run_single_scenario_custom_op<Tag, ValueType>(it, ValueType{1},
|
||||
custom_bop_t());
|
||||
run_single_scenario_custom_op<Tag, ValueType>(it, ValueType{-2},
|
||||
custom_bop_t());
|
||||
run_single_scenario_custom_op<Tag, ValueType>(it, ValueType{3},
|
||||
custom_bop_t());
|
||||
run_single_scenario<Tag, ValueType>(it, ValueType{0}, custom_bop_t());
|
||||
run_single_scenario<Tag, ValueType>(it, ValueType{1}, custom_bop_t());
|
||||
run_single_scenario<Tag, ValueType>(it, ValueType{-2}, custom_bop_t());
|
||||
run_single_scenario<Tag, ValueType>(it, ValueType{3}, custom_bop_t());
|
||||
|
||||
run_single_scenario_inplace<Tag, ValueType>(it, ValueType{0},
|
||||
custom_bop_t());
|
||||
run_single_scenario_inplace<Tag, ValueType>(it, ValueType{-2},
|
||||
custom_bop_t());
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
@ -16,6 +16,7 @@
|
||||
|
||||
#include <TestStdAlgorithmsCommon.hpp>
|
||||
#include <utility>
|
||||
#include <iomanip>
|
||||
|
||||
namespace Test {
|
||||
namespace stdalgos {
|
||||
@ -143,51 +144,6 @@ void my_host_inclusive_scan(it1 first, it1 last, it2 dest, BinOp bop,
|
||||
}
|
||||
}
|
||||
|
||||
template <class ViewType1, class ViewType2, class BinaryOp, class... Args>
|
||||
void verify_data(ViewType1 data_view, // contains data
|
||||
ViewType2 test_view, // the view to test
|
||||
BinaryOp bop, Args... args /* copy on purpose */) {
|
||||
//! always careful because views might not be deep copyable
|
||||
|
||||
auto data_view_dc = create_deep_copyable_compatible_clone(data_view);
|
||||
auto data_view_h =
|
||||
create_mirror_view_and_copy(Kokkos::HostSpace(), data_view_dc);
|
||||
|
||||
using gold_view_value_type = typename ViewType2::value_type;
|
||||
Kokkos::View<gold_view_value_type*, Kokkos::HostSpace> gold_h(
|
||||
"goldh", data_view.extent(0));
|
||||
my_host_inclusive_scan(KE::cbegin(data_view_h), KE::cend(data_view_h),
|
||||
KE::begin(gold_h), bop, args...);
|
||||
|
||||
auto test_view_dc = create_deep_copyable_compatible_clone(test_view);
|
||||
auto test_view_h =
|
||||
create_mirror_view_and_copy(Kokkos::HostSpace(), test_view_dc);
|
||||
|
||||
const auto ext = test_view_h.extent(0);
|
||||
if (ext > 0) {
|
||||
for (std::size_t i = 0; i < ext; ++i) {
|
||||
// std::cout << i << " " << std::setprecision(15) << data_view_h(i) << " "
|
||||
// << gold_h(i) << " " << test_view_h(i) << " "
|
||||
// << std::abs(gold_h(i) - test_view_h(i)) << std::endl;
|
||||
|
||||
if (std::is_same<gold_view_value_type, int>::value) {
|
||||
ASSERT_EQ(gold_h(i), test_view_h(i));
|
||||
} else {
|
||||
const auto error =
|
||||
std::abs(static_cast<double>(gold_h(i) - test_view_h(i)));
|
||||
if (error > 1e-10) {
|
||||
std::cout << i << " " << std::setprecision(15) << data_view_h(i)
|
||||
<< " " << gold_h(i) << " " << test_view_h(i) << " "
|
||||
<< std::abs(static_cast<double>(gold_h(i) - test_view_h(i)))
|
||||
<< std::endl;
|
||||
}
|
||||
EXPECT_LT(error, 1e-10);
|
||||
}
|
||||
}
|
||||
// std::cout << " last el: " << test_view_h(ext-1) << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
template <class ValueType>
|
||||
struct MultiplyFunctor {
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
@ -204,107 +160,151 @@ struct SumFunctor {
|
||||
}
|
||||
};
|
||||
|
||||
struct VerifyData {
|
||||
template <class ViewType1, class ViewType2, class BinaryOp, class... Args>
|
||||
void operator()(ViewType1 data_view, // contains data
|
||||
ViewType2 test_view, // the view to test
|
||||
BinaryOp bop, Args... args /* copy on purpose */) {
|
||||
//! always careful because views might not be deep copyable
|
||||
|
||||
auto data_view_dc = create_deep_copyable_compatible_clone(data_view);
|
||||
auto data_view_h =
|
||||
create_mirror_view_and_copy(Kokkos::HostSpace(), data_view_dc);
|
||||
|
||||
using gold_view_value_type = typename ViewType2::value_type;
|
||||
Kokkos::View<gold_view_value_type*, Kokkos::HostSpace> gold_h(
|
||||
"goldh", data_view.extent(0));
|
||||
my_host_inclusive_scan(KE::cbegin(data_view_h), KE::cend(data_view_h),
|
||||
KE::begin(gold_h), bop, args...);
|
||||
|
||||
auto test_view_dc = create_deep_copyable_compatible_clone(test_view);
|
||||
auto test_view_h =
|
||||
create_mirror_view_and_copy(Kokkos::HostSpace(), test_view_dc);
|
||||
|
||||
const auto ext = test_view_h.extent(0);
|
||||
if (ext > 0) {
|
||||
for (std::size_t i = 0; i < ext; ++i) {
|
||||
if (std::is_same<gold_view_value_type, int>::value) {
|
||||
ASSERT_EQ(gold_h(i), test_view_h(i));
|
||||
} else {
|
||||
const auto error =
|
||||
std::abs(static_cast<double>(gold_h(i) - test_view_h(i)));
|
||||
ASSERT_LT(error, 1e-10) << i << " " << std::setprecision(15) << error
|
||||
<< static_cast<double>(test_view_h(i)) << " "
|
||||
<< static_cast<double>(gold_h(i));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <class ViewType1, class ViewType2>
|
||||
void operator()(ViewType1 data_view, // contains data
|
||||
ViewType2 test_view) // the view to test
|
||||
{
|
||||
using value_type = typename ViewType1::non_const_value_type;
|
||||
(*this)(data_view, test_view, SumFunctor<value_type>());
|
||||
}
|
||||
};
|
||||
|
||||
std::string value_type_to_string(int) { return "int"; }
|
||||
std::string value_type_to_string(double) { return "double"; }
|
||||
|
||||
template <class Tag, class ValueType, class InfoType>
|
||||
void run_single_scenario_default_op(const InfoType& scenario_info) {
|
||||
using default_op = SumFunctor<ValueType>;
|
||||
template <class Tag, class ValueType, class InfoType, class... Args>
|
||||
void run_single_scenario(const InfoType& scenario_info,
|
||||
Args... args /* copy on purpose */) {
|
||||
const auto name = std::get<0>(scenario_info);
|
||||
const std::size_t view_ext = std::get<1>(scenario_info);
|
||||
// std::cout << "inclusive_scan default op: " << name << ", "
|
||||
// << view_tag_to_string(Tag{}) << ", "
|
||||
// << value_type_to_string(ValueType()) << std::endl;
|
||||
|
||||
auto view_dest = create_view<ValueType>(Tag{}, view_ext, "inclusive_scan");
|
||||
auto view_from = create_view<ValueType>(Tag{}, view_ext, "inclusive_scan");
|
||||
fill_view(view_from, name);
|
||||
// view_dest is filled with zeros before calling the algorithm everytime to
|
||||
// ensure the algorithm does something meaningful
|
||||
|
||||
{
|
||||
fill_zero(view_dest);
|
||||
auto r = KE::inclusive_scan(exespace(), KE::cbegin(view_from),
|
||||
KE::cend(view_from), KE::begin(view_dest));
|
||||
auto r =
|
||||
KE::inclusive_scan(exespace(), KE::cbegin(view_from),
|
||||
KE::cend(view_from), KE::begin(view_dest), args...);
|
||||
ASSERT_EQ(r, KE::end(view_dest));
|
||||
verify_data(view_from, view_dest, default_op());
|
||||
VerifyData()(view_from, view_dest, args...);
|
||||
}
|
||||
|
||||
{
|
||||
fill_zero(view_dest);
|
||||
auto r = KE::inclusive_scan("label", exespace(), KE::cbegin(view_from),
|
||||
KE::cend(view_from), KE::begin(view_dest));
|
||||
auto r =
|
||||
KE::inclusive_scan("label", exespace(), KE::cbegin(view_from),
|
||||
KE::cend(view_from), KE::begin(view_dest), args...);
|
||||
ASSERT_EQ(r, KE::end(view_dest));
|
||||
verify_data(view_from, view_dest, default_op());
|
||||
VerifyData()(view_from, view_dest, args...);
|
||||
}
|
||||
|
||||
{
|
||||
fill_zero(view_dest);
|
||||
auto r = KE::inclusive_scan(exespace(), view_from, view_dest);
|
||||
auto r = KE::inclusive_scan(exespace(), view_from, view_dest, args...);
|
||||
ASSERT_EQ(r, KE::end(view_dest));
|
||||
verify_data(view_from, view_dest, default_op());
|
||||
VerifyData()(view_from, view_dest, args...);
|
||||
}
|
||||
|
||||
{
|
||||
fill_zero(view_dest);
|
||||
auto r = KE::inclusive_scan("label", exespace(), view_from, view_dest);
|
||||
auto r =
|
||||
KE::inclusive_scan("label", exespace(), view_from, view_dest, args...);
|
||||
ASSERT_EQ(r, KE::end(view_dest));
|
||||
verify_data(view_from, view_dest, default_op());
|
||||
VerifyData()(view_from, view_dest, args...);
|
||||
}
|
||||
|
||||
Kokkos::fence();
|
||||
}
|
||||
|
||||
template <class Tag, class ValueType, class InfoType, class BinaryOp,
|
||||
class... Args>
|
||||
void run_single_scenario_custom_op(const InfoType& scenario_info, BinaryOp bop,
|
||||
Args... args /* copy on purpose */) {
|
||||
template <class Tag, class ValueType, class InfoType, class... Args>
|
||||
void run_single_scenario_inplace(const InfoType& scenario_info,
|
||||
Args... args /* copy on purpose */) {
|
||||
const auto name = std::get<0>(scenario_info);
|
||||
const std::size_t view_ext = std::get<1>(scenario_info);
|
||||
|
||||
// if (1 == sizeof...(Args)) {
|
||||
// std::cout << "inclusive_scan custom op and init value: " << name << ", "
|
||||
// << view_tag_to_string(Tag{}) << ", "
|
||||
// << value_type_to_string(ValueType()) << ", " << std::endl;
|
||||
// } else {
|
||||
// std::cout << "inclusive_scan custom op: " << name << ", "
|
||||
// << view_tag_to_string(Tag{}) << ", "
|
||||
// << value_type_to_string(ValueType()) << ", " << std::endl;
|
||||
// }
|
||||
// since here we call the in-place operation, we need to use two views:
|
||||
// view1: filled according to what the scenario asks for and is not modified
|
||||
// view2: filled according to what the scenario asks for and used for the
|
||||
// in-place op Therefore, after the op is done, view_2 should contain the
|
||||
// result of doing exclusive scan NOTE: view2 is filled below every time
|
||||
// because the algorithm acts in place
|
||||
|
||||
auto view_dest = create_view<ValueType>(Tag{}, view_ext, "inclusive_scan");
|
||||
auto view_from = create_view<ValueType>(Tag{}, view_ext, "inclusive_scan");
|
||||
fill_view(view_from, name);
|
||||
auto view1 =
|
||||
create_view<ValueType>(Tag{}, view_ext, "inclusive_scan_inplace_view1");
|
||||
fill_view(view1, name);
|
||||
|
||||
auto view2 =
|
||||
create_view<ValueType>(Tag{}, view_ext, "inclusive_scan_inplace_view2");
|
||||
|
||||
{
|
||||
fill_zero(view_dest);
|
||||
auto r = KE::inclusive_scan(exespace(), KE::cbegin(view_from),
|
||||
KE::cend(view_from), KE::begin(view_dest), bop,
|
||||
args...);
|
||||
ASSERT_EQ(r, KE::end(view_dest));
|
||||
verify_data(view_from, view_dest, bop, args...);
|
||||
fill_view(view2, name);
|
||||
auto r = KE::inclusive_scan(exespace(), KE::cbegin(view2), KE::cend(view2),
|
||||
KE::begin(view2), args...);
|
||||
ASSERT_EQ(r, KE::end(view2));
|
||||
VerifyData()(view1, view2, args...);
|
||||
}
|
||||
|
||||
{
|
||||
fill_zero(view_dest);
|
||||
auto r = KE::inclusive_scan("label", exespace(), KE::cbegin(view_from),
|
||||
KE::cend(view_from), KE::begin(view_dest), bop,
|
||||
args...);
|
||||
ASSERT_EQ(r, KE::end(view_dest));
|
||||
verify_data(view_from, view_dest, bop, args...);
|
||||
fill_view(view2, name);
|
||||
auto r = KE::inclusive_scan("label", exespace(), KE::cbegin(view2),
|
||||
KE::cend(view2), KE::begin(view2), args...);
|
||||
ASSERT_EQ(r, KE::end(view2));
|
||||
VerifyData()(view1, view2, args...);
|
||||
}
|
||||
|
||||
{
|
||||
fill_zero(view_dest);
|
||||
auto r = KE::inclusive_scan(exespace(), view_from, view_dest, bop, args...);
|
||||
ASSERT_EQ(r, KE::end(view_dest));
|
||||
verify_data(view_from, view_dest, bop, args...);
|
||||
fill_view(view2, name);
|
||||
auto r = KE::inclusive_scan(exespace(), view2, view2, args...);
|
||||
ASSERT_EQ(r, KE::end(view2));
|
||||
VerifyData()(view1, view2, args...);
|
||||
}
|
||||
|
||||
{
|
||||
fill_zero(view_dest);
|
||||
auto r = KE::inclusive_scan("label", exespace(), view_from, view_dest, bop,
|
||||
args...);
|
||||
ASSERT_EQ(r, KE::end(view_dest));
|
||||
verify_data(view_from, view_dest, bop, args...);
|
||||
fill_view(view2, name);
|
||||
auto r = KE::inclusive_scan("label", exespace(), view2, view2, args...);
|
||||
ASSERT_EQ(r, KE::end(view2));
|
||||
VerifyData()(view1, view2, args...);
|
||||
}
|
||||
|
||||
Kokkos::fence();
|
||||
@ -318,27 +318,35 @@ void run_inclusive_scan_all_scenarios() {
|
||||
{"medium-a", 313}, {"medium-b", 1103}, {"large", 10513}};
|
||||
|
||||
for (const auto& it : scenarios) {
|
||||
run_single_scenario_default_op<Tag, ValueType>(it);
|
||||
run_single_scenario<Tag, ValueType>(it);
|
||||
run_single_scenario_inplace<Tag, ValueType>(it);
|
||||
|
||||
#if !defined KOKKOS_ENABLE_OPENMPTARGET
|
||||
// the sum custom op is always run
|
||||
using sum_binary_op = SumFunctor<ValueType>;
|
||||
sum_binary_op sbop;
|
||||
run_single_scenario_custom_op<Tag, ValueType>(it, sbop);
|
||||
run_single_scenario_custom_op<Tag, ValueType>(it, sbop, ValueType{0});
|
||||
run_single_scenario_custom_op<Tag, ValueType>(it, sbop, ValueType{1});
|
||||
run_single_scenario_custom_op<Tag, ValueType>(it, sbop, ValueType{-2});
|
||||
run_single_scenario_custom_op<Tag, ValueType>(it, sbop, ValueType{3});
|
||||
run_single_scenario<Tag, ValueType>(it, sbop);
|
||||
run_single_scenario<Tag, ValueType>(it, sbop, ValueType{0});
|
||||
run_single_scenario<Tag, ValueType>(it, sbop, ValueType{1});
|
||||
run_single_scenario<Tag, ValueType>(it, sbop, ValueType{-2});
|
||||
run_single_scenario<Tag, ValueType>(it, sbop, ValueType{3});
|
||||
|
||||
run_single_scenario_inplace<Tag, ValueType>(it, sbop, ValueType{0});
|
||||
run_single_scenario_inplace<Tag, ValueType>(it, sbop, ValueType{-2});
|
||||
|
||||
// custom multiply only for small views to avoid overflows
|
||||
if (it.first == "small-a" || it.first == "small-b") {
|
||||
using mult_binary_op = MultiplyFunctor<ValueType>;
|
||||
mult_binary_op mbop;
|
||||
run_single_scenario_custom_op<Tag, ValueType>(it, mbop);
|
||||
run_single_scenario_custom_op<Tag, ValueType>(it, mbop, ValueType{0});
|
||||
run_single_scenario_custom_op<Tag, ValueType>(it, mbop, ValueType{1});
|
||||
run_single_scenario_custom_op<Tag, ValueType>(it, mbop, ValueType{-2});
|
||||
run_single_scenario_custom_op<Tag, ValueType>(it, mbop, ValueType{3});
|
||||
run_single_scenario<Tag, ValueType>(it, mbop);
|
||||
run_single_scenario<Tag, ValueType>(it, mbop, ValueType{0});
|
||||
run_single_scenario<Tag, ValueType>(it, mbop, ValueType{1});
|
||||
run_single_scenario<Tag, ValueType>(it, mbop, ValueType{-2});
|
||||
run_single_scenario<Tag, ValueType>(it, mbop, ValueType{3});
|
||||
|
||||
run_single_scenario_inplace<Tag, ValueType>(it, mbop);
|
||||
run_single_scenario_inplace<Tag, ValueType>(it, mbop, ValueType{0});
|
||||
run_single_scenario_inplace<Tag, ValueType>(it, mbop, ValueType{-2});
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -146,7 +146,7 @@ void run_single_scenario(const InfoType& scenario_info) {
|
||||
resultsA[3] = KE::is_sorted("label", exespace(), view);
|
||||
const auto allA = std::all_of(resultsA.cbegin(), resultsA.cend(),
|
||||
[=](bool v) { return v == gold; });
|
||||
EXPECT_TRUE(allA);
|
||||
EXPECT_TRUE(allA) << name << ", " << view_tag_to_string(Tag{});
|
||||
|
||||
#if !defined KOKKOS_ENABLE_OPENMPTARGET
|
||||
CustomLessThanComparator<ValueType, ValueType> comp;
|
||||
@ -159,7 +159,7 @@ void run_single_scenario(const InfoType& scenario_info) {
|
||||
resultsB[3] = KE::is_sorted("label", exespace(), view, comp);
|
||||
const auto allB = std::all_of(resultsB.cbegin(), resultsB.cend(),
|
||||
[=](bool v) { return v == gold; });
|
||||
EXPECT_TRUE(allB);
|
||||
EXPECT_TRUE(allB) << name << ", " << view_tag_to_string(Tag{});
|
||||
#endif
|
||||
|
||||
Kokkos::fence();
|
||||
@ -173,9 +173,6 @@ void run_is_sorted_all_scenarios() {
|
||||
{"medium-a", 1003}, {"medium-b", 1003}, {"large-a", 101513},
|
||||
{"large-b", 101513}};
|
||||
|
||||
std::cout << "is_sorted: " << view_tag_to_string(Tag{})
|
||||
<< ", all overloads \n";
|
||||
|
||||
for (const auto& it : scenarios) {
|
||||
run_single_scenario<Tag, ValueType>(it);
|
||||
}
|
||||
|
||||
@ -145,10 +145,10 @@ void run_single_scenario(const InfoType& scenario_info) {
|
||||
KE::is_sorted_until("label", exespace(), KE::begin(view), KE::end(view));
|
||||
auto r3 = KE::is_sorted_until(exespace(), view);
|
||||
auto r4 = KE::is_sorted_until("label", exespace(), view);
|
||||
ASSERT_EQ(r1, gold);
|
||||
ASSERT_EQ(r2, gold);
|
||||
ASSERT_EQ(r3, gold);
|
||||
ASSERT_EQ(r4, gold);
|
||||
ASSERT_EQ(r1, gold) << name << ", " << view_tag_to_string(Tag{});
|
||||
ASSERT_EQ(r2, gold) << name << ", " << view_tag_to_string(Tag{});
|
||||
ASSERT_EQ(r3, gold) << name << ", " << view_tag_to_string(Tag{});
|
||||
ASSERT_EQ(r4, gold) << name << ", " << view_tag_to_string(Tag{});
|
||||
|
||||
#if !defined KOKKOS_ENABLE_OPENMPTARGET
|
||||
CustomLessThanComparator<ValueType, ValueType> comp;
|
||||
@ -160,10 +160,10 @@ void run_single_scenario(const InfoType& scenario_info) {
|
||||
auto r8 = KE::is_sorted_until("label", exespace(), view, comp);
|
||||
#endif
|
||||
|
||||
ASSERT_EQ(r1, gold);
|
||||
ASSERT_EQ(r2, gold);
|
||||
ASSERT_EQ(r3, gold);
|
||||
ASSERT_EQ(r4, gold);
|
||||
ASSERT_EQ(r1, gold) << name << ", " << view_tag_to_string(Tag{});
|
||||
ASSERT_EQ(r2, gold) << name << ", " << view_tag_to_string(Tag{});
|
||||
ASSERT_EQ(r3, gold) << name << ", " << view_tag_to_string(Tag{});
|
||||
ASSERT_EQ(r4, gold) << name << ", " << view_tag_to_string(Tag{});
|
||||
|
||||
Kokkos::fence();
|
||||
}
|
||||
@ -176,9 +176,6 @@ void run_is_sorted_until_all_scenarios() {
|
||||
{"medium-a", 1003}, {"medium-b", 1003}, {"large-a", 101513},
|
||||
{"large-b", 101513}};
|
||||
|
||||
std::cout << "is_sorted_until: " << view_tag_to_string(Tag{})
|
||||
<< ", all overloads \n";
|
||||
|
||||
for (const auto& it : scenarios) {
|
||||
run_single_scenario<Tag, ValueType>(it);
|
||||
}
|
||||
|
||||
@ -48,7 +48,7 @@ struct MyMovableType {
|
||||
TEST(std_algorithms_mod_ops_test, move) {
|
||||
MyMovableType a;
|
||||
using move_t = decltype(std::move(a));
|
||||
static_assert(std::is_rvalue_reference<move_t>::value, "");
|
||||
static_assert(std::is_rvalue_reference<move_t>::value);
|
||||
|
||||
// move constr
|
||||
MyMovableType b(std::move(a));
|
||||
@ -70,7 +70,7 @@ struct StdAlgoModSeqOpsTestMove {
|
||||
void operator()(const int index) const {
|
||||
typename ViewType::value_type a{11};
|
||||
using move_t = decltype(std::move(a));
|
||||
static_assert(std::is_rvalue_reference<move_t>::value, "");
|
||||
static_assert(std::is_rvalue_reference<move_t>::value);
|
||||
m_view(index) = std::move(a);
|
||||
}
|
||||
|
||||
@ -89,50 +89,6 @@ TEST(std_algorithms_mod_ops_test, move_within_parfor) {
|
||||
}
|
||||
}
|
||||
|
||||
// ------------
|
||||
// swap
|
||||
// ------------
|
||||
TEST(std_algorithms_mod_ops_test, swap) {
|
||||
{
|
||||
int a = 1;
|
||||
int b = 2;
|
||||
KE::swap(a, b);
|
||||
ASSERT_EQ(a, 2);
|
||||
ASSERT_EQ(b, 1);
|
||||
}
|
||||
|
||||
{
|
||||
double a = 3.;
|
||||
double b = 1.;
|
||||
KE::swap(a, b);
|
||||
EXPECT_DOUBLE_EQ(a, 1.);
|
||||
EXPECT_DOUBLE_EQ(b, 3.);
|
||||
}
|
||||
}
|
||||
|
||||
template <class ViewType>
|
||||
struct StdAlgoModSeqOpsTestSwap {
|
||||
ViewType m_view;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(const int index) const {
|
||||
typename ViewType::value_type newval{11};
|
||||
KE::swap(m_view(index), newval);
|
||||
}
|
||||
|
||||
StdAlgoModSeqOpsTestSwap(ViewType aIn) : m_view(aIn) {}
|
||||
};
|
||||
|
||||
TEST(std_algorithms_mod_ops_test, swap_within_parfor) {
|
||||
auto a = create_view<double>(stdalgos::DynamicTag{}, 10, "a");
|
||||
StdAlgoModSeqOpsTestSwap<decltype(a)> fnc(a);
|
||||
Kokkos::parallel_for(a.extent(0), fnc);
|
||||
auto a_h = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), a);
|
||||
for (std::size_t i = 0; i < a.extent(0); ++i) {
|
||||
EXPECT_DOUBLE_EQ(a_h(0), 11.);
|
||||
}
|
||||
}
|
||||
|
||||
// ------------
|
||||
// iter_swap
|
||||
// ------------
|
||||
|
||||
@ -110,11 +110,9 @@ void verify_data(const std::string& name, ResultType my_result,
|
||||
ViewTypeDestFalse view_dest_false, PredType pred) {
|
||||
using value_type = typename ViewTypeFrom::value_type;
|
||||
static_assert(
|
||||
std::is_same<value_type, typename ViewTypeDestTrue::value_type>::value,
|
||||
"");
|
||||
std::is_same<value_type, typename ViewTypeDestTrue::value_type>::value);
|
||||
static_assert(
|
||||
std::is_same<value_type, typename ViewTypeDestFalse::value_type>::value,
|
||||
"");
|
||||
std::is_same<value_type, typename ViewTypeDestFalse::value_type>::value);
|
||||
|
||||
const std::size_t ext = view_from.extent(0);
|
||||
|
||||
|
||||
@ -166,6 +166,10 @@ void run_all_scenarios() {
|
||||
}
|
||||
|
||||
TEST(std_algorithms_copy_if_team_test, test) {
|
||||
// FIXME_OPENMPTARGET
|
||||
#if defined(KOKKOS_ENABLE_OPENMPTARGET) && defined(KOKKOS_ARCH_INTEL_GPU)
|
||||
GTEST_SKIP() << "the test is known to fail with OpenMPTarget on Intel GPUs";
|
||||
#endif
|
||||
run_all_scenarios<DynamicTag, double>();
|
||||
run_all_scenarios<StridedTwoRowsTag, int>();
|
||||
run_all_scenarios<StridedThreeRowsTag, unsigned>();
|
||||
|
||||
@ -121,7 +121,9 @@ struct TestFunctorA {
|
||||
}
|
||||
};
|
||||
|
||||
template <class LayoutTag, class ValueType>
|
||||
struct InPlace {};
|
||||
|
||||
template <class LayoutTag, class ValueType, class InPlaceOrVoid = void>
|
||||
void test_A(std::size_t numTeams, std::size_t numCols, int apiId) {
|
||||
/* description:
|
||||
use a rank-2 view randomly filled with values,
|
||||
@ -147,9 +149,6 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) {
|
||||
using space_t = Kokkos::DefaultExecutionSpace;
|
||||
Kokkos::TeamPolicy<space_t> policy(numTeams, Kokkos::AUTO());
|
||||
|
||||
// create the destination view
|
||||
Kokkos::View<ValueType**> destView("destView", numTeams, numCols);
|
||||
|
||||
// exclusive_scan returns an iterator so to verify that it is correct
|
||||
// each team stores the distance of the returned iterator from the beginning
|
||||
// of the interval that team operates on and then we check that these
|
||||
@ -168,12 +167,19 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) {
|
||||
rand_pool pool(lowerBound * upperBound);
|
||||
Kokkos::fill_random(initValuesView_h, pool, lowerBound, upperBound);
|
||||
|
||||
// use CTAD for functor
|
||||
auto initValuesView =
|
||||
Kokkos::create_mirror_view_and_copy(space_t(), initValuesView_h);
|
||||
TestFunctorA fnc(sourceView, destView, distancesView, intraTeamSentinelView,
|
||||
initValuesView, binaryOp, apiId);
|
||||
Kokkos::parallel_for(policy, fnc);
|
||||
|
||||
Kokkos::View<ValueType**> destView("destView", numTeams, numCols);
|
||||
if constexpr (std::is_same_v<InPlaceOrVoid, InPlace>) {
|
||||
TestFunctorA fnc(sourceView, sourceView, distancesView,
|
||||
intraTeamSentinelView, initValuesView, binaryOp, apiId);
|
||||
Kokkos::parallel_for(policy, fnc);
|
||||
} else {
|
||||
TestFunctorA fnc(sourceView, destView, distancesView, intraTeamSentinelView,
|
||||
initValuesView, binaryOp, apiId);
|
||||
Kokkos::parallel_for(policy, fnc);
|
||||
}
|
||||
|
||||
// -----------------------------------------------
|
||||
// run cpp-std kernel and check
|
||||
@ -223,11 +229,16 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) {
|
||||
#undef exclusive_scan
|
||||
}
|
||||
|
||||
auto dataViewAfterOp_h = create_host_space_copy(destView);
|
||||
expect_equal_host_views(stdDestView, dataViewAfterOp_h);
|
||||
if constexpr (std::is_same_v<InPlaceOrVoid, InPlace>) {
|
||||
auto dataViewAfterOp_h = create_host_space_copy(sourceView);
|
||||
expect_equal_host_views(stdDestView, dataViewAfterOp_h);
|
||||
} else {
|
||||
auto dataViewAfterOp_h = create_host_space_copy(destView);
|
||||
expect_equal_host_views(stdDestView, dataViewAfterOp_h);
|
||||
}
|
||||
}
|
||||
|
||||
template <class LayoutTag, class ValueType>
|
||||
template <class LayoutTag, class ValueType, class InPlaceOrVoid = void>
|
||||
void run_all_scenarios() {
|
||||
for (int numTeams : teamSizesToTest) {
|
||||
for (const auto& numCols : {0, 1, 2, 13, 101, 1444, 8153}) {
|
||||
@ -236,16 +247,24 @@ void run_all_scenarios() {
|
||||
#else
|
||||
for (int apiId : {0, 1}) {
|
||||
#endif
|
||||
test_A<LayoutTag, ValueType>(numTeams, numCols, apiId);
|
||||
test_A<LayoutTag, ValueType, InPlaceOrVoid>(numTeams, numCols, apiId);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(std_algorithms_exclusive_scan_team_test, test) {
|
||||
// FIXME_OPENMPTARGET
|
||||
#if defined(KOKKOS_ENABLE_OPENMPTARGET) && defined(KOKKOS_ARCH_INTEL_GPU)
|
||||
GTEST_SKIP() << "the test is known to fail with OpenMPTarget on Intel GPUs";
|
||||
#endif
|
||||
run_all_scenarios<DynamicTag, double>();
|
||||
run_all_scenarios<StridedTwoRowsTag, int>();
|
||||
run_all_scenarios<StridedThreeRowsTag, unsigned>();
|
||||
|
||||
run_all_scenarios<DynamicTag, double, InPlace>();
|
||||
run_all_scenarios<StridedTwoRowsTag, int, InPlace>();
|
||||
run_all_scenarios<StridedThreeRowsTag, unsigned, InPlace>();
|
||||
}
|
||||
|
||||
} // namespace TeamExclusiveScan
|
||||
|
||||
@ -139,7 +139,9 @@ struct TestFunctorA {
|
||||
}
|
||||
};
|
||||
|
||||
template <class LayoutTag, class ValueType>
|
||||
struct InPlace {};
|
||||
|
||||
template <class LayoutTag, class ValueType, class InPlaceOrVoid = void>
|
||||
void test_A(std::size_t numTeams, std::size_t numCols, int apiId) {
|
||||
/* description:
|
||||
use a rank-2 view randomly filled with values,
|
||||
@ -165,9 +167,6 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) {
|
||||
using space_t = Kokkos::DefaultExecutionSpace;
|
||||
Kokkos::TeamPolicy<space_t> policy(numTeams, Kokkos::AUTO());
|
||||
|
||||
// create the destination view
|
||||
Kokkos::View<ValueType**> destView("destView", numTeams, numCols);
|
||||
|
||||
// inclusive_scan returns an iterator so to verify that it is correct
|
||||
// each team stores the distance of the returned iterator from the beginning
|
||||
// of the interval that team operates on and then we check that these
|
||||
@ -186,12 +185,20 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) {
|
||||
rand_pool pool(lowerBound * upperBound);
|
||||
Kokkos::fill_random(initValuesView_h, pool, lowerBound, upperBound);
|
||||
|
||||
// use CTAD for functor
|
||||
auto initValuesView =
|
||||
Kokkos::create_mirror_view_and_copy(space_t(), initValuesView_h);
|
||||
TestFunctorA fnc(sourceView, destView, distancesView, intraTeamSentinelView,
|
||||
initValuesView, binaryOp, apiId);
|
||||
Kokkos::parallel_for(policy, fnc);
|
||||
|
||||
// create the destination view
|
||||
Kokkos::View<ValueType**> destView("destView", numTeams, numCols);
|
||||
if constexpr (std::is_same_v<InPlaceOrVoid, InPlace>) {
|
||||
TestFunctorA fnc(sourceView, sourceView, distancesView,
|
||||
intraTeamSentinelView, initValuesView, binaryOp, apiId);
|
||||
Kokkos::parallel_for(policy, fnc);
|
||||
} else {
|
||||
TestFunctorA fnc(sourceView, destView, distancesView, intraTeamSentinelView,
|
||||
initValuesView, binaryOp, apiId);
|
||||
Kokkos::parallel_for(policy, fnc);
|
||||
}
|
||||
|
||||
// -----------------------------------------------
|
||||
// run cpp-std kernel and check
|
||||
@ -251,25 +258,38 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) {
|
||||
#undef inclusive_scan
|
||||
}
|
||||
|
||||
auto dataViewAfterOp_h = create_host_space_copy(destView);
|
||||
expect_equal_host_views(stdDestView, dataViewAfterOp_h);
|
||||
if constexpr (std::is_same_v<InPlaceOrVoid, InPlace>) {
|
||||
auto dataViewAfterOp_h = create_host_space_copy(sourceView);
|
||||
expect_equal_host_views(stdDestView, dataViewAfterOp_h);
|
||||
} else {
|
||||
auto dataViewAfterOp_h = create_host_space_copy(destView);
|
||||
expect_equal_host_views(stdDestView, dataViewAfterOp_h);
|
||||
}
|
||||
}
|
||||
|
||||
template <class LayoutTag, class ValueType>
|
||||
template <class LayoutTag, class ValueType, class InPlaceOrVoid = void>
|
||||
void run_all_scenarios() {
|
||||
for (int numTeams : teamSizesToTest) {
|
||||
for (const auto& numCols : {0, 1, 2, 13, 101, 1444, 8153}) {
|
||||
for (int apiId : {0, 1, 2, 3, 4, 5}) {
|
||||
test_A<LayoutTag, ValueType>(numTeams, numCols, apiId);
|
||||
test_A<LayoutTag, ValueType, InPlaceOrVoid>(numTeams, numCols, apiId);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(std_algorithms_inclusive_scan_team_test, test) {
|
||||
// FIXME_OPENMPTARGET
|
||||
#if defined(KOKKOS_ENABLE_OPENMPTARGET) && defined(KOKKOS_ARCH_INTEL_GPU)
|
||||
GTEST_SKIP() << "the test is known to fail with OpenMPTarget on Intel GPUs";
|
||||
#endif
|
||||
run_all_scenarios<DynamicTag, double>();
|
||||
run_all_scenarios<StridedTwoRowsTag, int>();
|
||||
run_all_scenarios<StridedThreeRowsTag, unsigned>();
|
||||
|
||||
run_all_scenarios<DynamicTag, double, InPlace>();
|
||||
run_all_scenarios<StridedTwoRowsTag, int, InPlace>();
|
||||
run_all_scenarios<StridedThreeRowsTag, unsigned, InPlace>();
|
||||
}
|
||||
|
||||
} // namespace TeamInclusiveScan
|
||||
|
||||
@ -212,6 +212,10 @@ void run_all_scenarios() {
|
||||
}
|
||||
|
||||
TEST(std_algorithms_remove_copy_team_test, test) {
|
||||
// FIXME_OPENMPTARGET
|
||||
#if defined(KOKKOS_ENABLE_OPENMPTARGET) && defined(KOKKOS_ARCH_INTEL_GPU)
|
||||
GTEST_SKIP() << "the test is known to fail with OpenMPTarget on Intel GPUs";
|
||||
#endif
|
||||
run_all_scenarios<DynamicTag, double>();
|
||||
run_all_scenarios<StridedTwoRowsTag, int>();
|
||||
run_all_scenarios<StridedThreeRowsTag, unsigned>();
|
||||
|
||||
@ -168,6 +168,10 @@ void run_all_scenarios() {
|
||||
}
|
||||
|
||||
TEST(std_algorithms_remove_copy_if_team_test, test) {
|
||||
// FIXME_OPENMPTARGET
|
||||
#if defined(KOKKOS_ENABLE_OPENMPTARGET) && defined(KOKKOS_ARCH_INTEL_GPU)
|
||||
GTEST_SKIP() << "the test is known to fail with OpenMPTarget on Intel GPUs";
|
||||
#endif
|
||||
run_all_scenarios<DynamicTag, double>();
|
||||
run_all_scenarios<StridedTwoRowsTag, int>();
|
||||
run_all_scenarios<StridedThreeRowsTag, unsigned>();
|
||||
|
||||
@ -108,7 +108,9 @@ struct TestFunctorA {
|
||||
}
|
||||
};
|
||||
|
||||
template <class LayoutTag, class ValueType>
|
||||
struct InPlace {};
|
||||
|
||||
template <class LayoutTag, class ValueType, class InPlaceOrVoid = void>
|
||||
void test_A(std::size_t numTeams, std::size_t numCols, int apiId) {
|
||||
/* description:
|
||||
use a rank-2 view randomly filled with values,
|
||||
@ -134,9 +136,6 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) {
|
||||
using space_t = Kokkos::DefaultExecutionSpace;
|
||||
Kokkos::TeamPolicy<space_t> policy(numTeams, Kokkos::AUTO());
|
||||
|
||||
// create the destination view
|
||||
Kokkos::View<ValueType**> destView("destView", numTeams, numCols);
|
||||
|
||||
// tranform_exclusive_scan returns an iterator so to verify that it is correct
|
||||
// each team stores the distance of the returned iterator from the beginning
|
||||
// of the interval that team operates on and then we check that these
|
||||
@ -156,12 +155,21 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) {
|
||||
rand_pool pool(lowerBound * upperBound);
|
||||
Kokkos::fill_random(initValuesView_h, pool, lowerBound, upperBound);
|
||||
|
||||
// use CTAD for functor
|
||||
auto initValuesView =
|
||||
Kokkos::create_mirror_view_and_copy(space_t(), initValuesView_h);
|
||||
TestFunctorA fnc(sourceView, destView, distancesView, intraTeamSentinelView,
|
||||
initValuesView, binaryOp, unaryOp, apiId);
|
||||
Kokkos::parallel_for(policy, fnc);
|
||||
|
||||
// create the destination view
|
||||
Kokkos::View<ValueType**> destView("destView", numTeams, numCols);
|
||||
if constexpr (std::is_same_v<InPlaceOrVoid, InPlace>) {
|
||||
TestFunctorA fnc(sourceView, sourceView, distancesView,
|
||||
intraTeamSentinelView, initValuesView, binaryOp, unaryOp,
|
||||
apiId);
|
||||
Kokkos::parallel_for(policy, fnc);
|
||||
} else {
|
||||
TestFunctorA fnc(sourceView, destView, distancesView, intraTeamSentinelView,
|
||||
initValuesView, binaryOp, unaryOp, apiId);
|
||||
Kokkos::parallel_for(policy, fnc);
|
||||
}
|
||||
|
||||
// -----------------------------------------------
|
||||
// run cpp-std kernel and check
|
||||
@ -200,16 +208,21 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) {
|
||||
#undef transform_exclusive_scan
|
||||
}
|
||||
|
||||
auto dataViewAfterOp_h = create_host_space_copy(destView);
|
||||
expect_equal_host_views(stdDestView, dataViewAfterOp_h);
|
||||
if constexpr (std::is_same_v<InPlaceOrVoid, InPlace>) {
|
||||
auto dataViewAfterOp_h = create_host_space_copy(sourceView);
|
||||
expect_equal_host_views(stdDestView, dataViewAfterOp_h);
|
||||
} else {
|
||||
auto dataViewAfterOp_h = create_host_space_copy(destView);
|
||||
expect_equal_host_views(stdDestView, dataViewAfterOp_h);
|
||||
}
|
||||
}
|
||||
|
||||
template <class LayoutTag, class ValueType>
|
||||
template <class LayoutTag, class ValueType, class InPlaceOrVoid = void>
|
||||
void run_all_scenarios() {
|
||||
for (int numTeams : teamSizesToTest) {
|
||||
for (const auto& numCols : {0, 1, 2, 13, 101, 1444, 8153}) {
|
||||
for (int apiId : {0, 1}) {
|
||||
test_A<LayoutTag, ValueType>(numTeams, numCols, apiId);
|
||||
test_A<LayoutTag, ValueType, InPlaceOrVoid>(numTeams, numCols, apiId);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -219,6 +232,10 @@ TEST(std_algorithms_transform_exclusive_scan_team_test, test) {
|
||||
run_all_scenarios<DynamicTag, double>();
|
||||
run_all_scenarios<StridedTwoRowsTag, int>();
|
||||
run_all_scenarios<StridedThreeRowsTag, unsigned>();
|
||||
|
||||
run_all_scenarios<DynamicTag, double, InPlace>();
|
||||
run_all_scenarios<StridedTwoRowsTag, int, InPlace>();
|
||||
run_all_scenarios<StridedThreeRowsTag, unsigned, InPlace>();
|
||||
}
|
||||
|
||||
} // namespace TeamTransformExclusiveScan
|
||||
|
||||
@ -131,7 +131,9 @@ struct TestFunctorA {
|
||||
}
|
||||
};
|
||||
|
||||
template <class LayoutTag, class ValueType>
|
||||
struct InPlace {};
|
||||
|
||||
template <class LayoutTag, class ValueType, class InPlaceOrVoid = void>
|
||||
void test_A(std::size_t numTeams, std::size_t numCols, int apiId) {
|
||||
/* description:
|
||||
use a rank-2 view randomly filled with values,
|
||||
@ -157,9 +159,6 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) {
|
||||
using space_t = Kokkos::DefaultExecutionSpace;
|
||||
Kokkos::TeamPolicy<space_t> policy(numTeams, Kokkos::AUTO());
|
||||
|
||||
// create the destination view
|
||||
Kokkos::View<ValueType**> destView("destView", numTeams, numCols);
|
||||
|
||||
// tranform_inclusive_scan returns an iterator so to verify that it is correct
|
||||
// each team stores the distance of the returned iterator from the beginning
|
||||
// of the interval that team operates on and then we check that these
|
||||
@ -179,12 +178,21 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) {
|
||||
rand_pool pool(lowerBound * upperBound);
|
||||
Kokkos::fill_random(initValuesView_h, pool, lowerBound, upperBound);
|
||||
|
||||
// use CTAD for functor
|
||||
auto initValuesView =
|
||||
Kokkos::create_mirror_view_and_copy(space_t(), initValuesView_h);
|
||||
TestFunctorA fnc(sourceView, destView, distancesView, intraTeamSentinelView,
|
||||
initValuesView, binaryOp, unaryOp, apiId);
|
||||
Kokkos::parallel_for(policy, fnc);
|
||||
|
||||
// create the destination view
|
||||
Kokkos::View<ValueType**> destView("destView", numTeams, numCols);
|
||||
if constexpr (std::is_same_v<InPlaceOrVoid, InPlace>) {
|
||||
TestFunctorA fnc(sourceView, sourceView, distancesView,
|
||||
intraTeamSentinelView, initValuesView, binaryOp, unaryOp,
|
||||
apiId);
|
||||
Kokkos::parallel_for(policy, fnc);
|
||||
} else {
|
||||
TestFunctorA fnc(sourceView, destView, distancesView, intraTeamSentinelView,
|
||||
initValuesView, binaryOp, unaryOp, apiId);
|
||||
Kokkos::parallel_for(policy, fnc);
|
||||
}
|
||||
|
||||
// -----------------------------------------------
|
||||
// run cpp-std kernel and check
|
||||
@ -236,16 +244,21 @@ void test_A(std::size_t numTeams, std::size_t numCols, int apiId) {
|
||||
}
|
||||
#undef transform_inclusive_scan
|
||||
|
||||
auto dataViewAfterOp_h = create_host_space_copy(destView);
|
||||
expect_equal_host_views(stdDestView, dataViewAfterOp_h);
|
||||
if constexpr (std::is_same_v<InPlaceOrVoid, InPlace>) {
|
||||
auto dataViewAfterOp_h = create_host_space_copy(sourceView);
|
||||
expect_equal_host_views(stdDestView, dataViewAfterOp_h);
|
||||
} else {
|
||||
auto dataViewAfterOp_h = create_host_space_copy(destView);
|
||||
expect_equal_host_views(stdDestView, dataViewAfterOp_h);
|
||||
}
|
||||
}
|
||||
|
||||
template <class LayoutTag, class ValueType>
|
||||
template <class LayoutTag, class ValueType, class InPlaceOrVoid = void>
|
||||
void run_all_scenarios() {
|
||||
for (int numTeams : teamSizesToTest) {
|
||||
for (const auto& numCols : {0, 1, 2, 13, 101, 1444, 8153}) {
|
||||
for (int apiId : {0, 1, 2, 3}) {
|
||||
test_A<LayoutTag, ValueType>(numTeams, numCols, apiId);
|
||||
test_A<LayoutTag, ValueType, InPlaceOrVoid>(numTeams, numCols, apiId);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -255,6 +268,10 @@ TEST(std_algorithms_transform_inclusive_scan_team_test, test) {
|
||||
run_all_scenarios<DynamicTag, double>();
|
||||
run_all_scenarios<StridedTwoRowsTag, int>();
|
||||
run_all_scenarios<StridedThreeRowsTag, unsigned>();
|
||||
|
||||
run_all_scenarios<DynamicTag, double, InPlace>();
|
||||
run_all_scenarios<StridedTwoRowsTag, int, InPlace>();
|
||||
run_all_scenarios<StridedThreeRowsTag, unsigned, InPlace>();
|
||||
}
|
||||
|
||||
} // namespace TeamTransformInclusiveScan
|
||||
|
||||
@ -186,6 +186,10 @@ void run_all_scenarios() {
|
||||
}
|
||||
|
||||
TEST(std_algorithms_unique_copy_team_test, test) {
|
||||
// FIXME_OPENMPTARGET
|
||||
#if defined(KOKKOS_ENABLE_OPENMPTARGET) && defined(KOKKOS_ARCH_INTEL_GPU)
|
||||
GTEST_SKIP() << "the test is known to fail with OpenMPTarget on Intel GPUs";
|
||||
#endif
|
||||
run_all_scenarios<DynamicTag, int>();
|
||||
run_all_scenarios<StridedTwoRowsTag, int>();
|
||||
run_all_scenarios<StridedThreeRowsTag, int>();
|
||||
|
||||
@ -16,6 +16,7 @@
|
||||
|
||||
#include <TestStdAlgorithmsCommon.hpp>
|
||||
#include <utility>
|
||||
#include <iomanip>
|
||||
|
||||
namespace Test {
|
||||
namespace stdalgos {
|
||||
@ -160,24 +161,15 @@ void verify_data(ViewType1 data_view, // contains data
|
||||
create_mirror_view_and_copy(Kokkos::HostSpace(), test_view_dc);
|
||||
if (test_view_h.extent(0) > 0) {
|
||||
for (std::size_t i = 0; i < test_view_h.extent(0); ++i) {
|
||||
// std::cout << i << " " << std::setprecision(15) << data_view_h(i) << " "
|
||||
// << gold_h(i) << " " << test_view_h(i) << " "
|
||||
// << std::abs(gold_h(i) - test_view_h(i)) << std::endl;
|
||||
|
||||
if (std::is_same<gold_view_value_type, int>::value) {
|
||||
ASSERT_EQ(gold_h(i), test_view_h(i));
|
||||
} else {
|
||||
const auto error = std::abs(gold_h(i) - test_view_h(i));
|
||||
if (error > 1e-10) {
|
||||
std::cout << i << " " << std::setprecision(15) << data_view_h(i)
|
||||
<< " " << gold_h(i) << " " << test_view_h(i) << " "
|
||||
<< std::abs(gold_h(i) - test_view_h(i)) << std::endl;
|
||||
}
|
||||
EXPECT_LT(error, 1e-10);
|
||||
ASSERT_LT(error, 1e-10) << i << " " << std::setprecision(15) << error
|
||||
<< static_cast<double>(test_view_h(i)) << " "
|
||||
<< static_cast<double>(gold_h(i));
|
||||
}
|
||||
}
|
||||
// std::cout << " last el: " << test_view_h(test_view_h.extent(0)-1) <<
|
||||
// std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
@ -205,17 +197,13 @@ void run_single_scenario(const InfoType& scenario_info, ValueType init_value,
|
||||
BinaryOp bop, UnaryOp uop) {
|
||||
const auto name = std::get<0>(scenario_info);
|
||||
const std::size_t view_ext = std::get<1>(scenario_info);
|
||||
// std::cout << "transform_exclusive_scan custom op: " << name << ", "
|
||||
// << view_tag_to_string(Tag{}) << ", "
|
||||
// << value_type_to_string(ValueType()) << ", "
|
||||
// << "init = " << init_value << std::endl;
|
||||
|
||||
auto view_dest =
|
||||
create_view<ValueType>(Tag{}, view_ext, "transform_exclusive_scan");
|
||||
auto view_from =
|
||||
create_view<ValueType>(Tag{}, view_ext, "transform_exclusive_scan");
|
||||
auto view_from = create_view<ValueType>(Tag{}, view_ext,
|
||||
"transform_exclusive_scan_view_from");
|
||||
fill_view(view_from, name);
|
||||
|
||||
auto view_dest = create_view<ValueType>(Tag{}, view_ext,
|
||||
"transform_exclusive_scan_view_dest");
|
||||
{
|
||||
fill_zero(view_dest);
|
||||
auto r = KE::transform_exclusive_scan(
|
||||
@ -253,6 +241,65 @@ void run_single_scenario(const InfoType& scenario_info, ValueType init_value,
|
||||
Kokkos::fence();
|
||||
}
|
||||
|
||||
template <class Tag, class ValueType, class InfoType, class BinaryOp,
|
||||
class UnaryOp>
|
||||
void run_single_scenario_inplace(const InfoType& scenario_info,
|
||||
ValueType init_value, BinaryOp bop,
|
||||
UnaryOp uop) {
|
||||
const auto name = std::get<0>(scenario_info);
|
||||
const std::size_t view_ext = std::get<1>(scenario_info);
|
||||
|
||||
// since here we call the in-place operation, we need to use two views:
|
||||
// view1: filled according to what the scenario asks for and is not modified
|
||||
// view2: filled according to what the scenario asks for and used for the
|
||||
// in-place op Therefore, after the op is done, view2 should contain the
|
||||
// result of doing exclusive scan NOTE: view2 is filled below every time
|
||||
// because the algorithm acts in place
|
||||
|
||||
auto view1 =
|
||||
create_view<ValueType>(Tag{}, view_ext, "transform_exclusive_scan_view1");
|
||||
fill_view(view1, name);
|
||||
|
||||
auto view2 =
|
||||
create_view<ValueType>(Tag{}, view_ext, "transform_exclusive_scan_view2");
|
||||
|
||||
{
|
||||
fill_view(view2, name);
|
||||
auto r = KE::transform_exclusive_scan(exespace(), KE::cbegin(view2),
|
||||
KE::cend(view2), KE::begin(view2),
|
||||
init_value, bop, uop);
|
||||
ASSERT_EQ(r, KE::end(view2));
|
||||
verify_data(view1, view2, init_value, bop, uop);
|
||||
}
|
||||
|
||||
{
|
||||
fill_view(view2, name);
|
||||
auto r = KE::transform_exclusive_scan(
|
||||
"label", exespace(), KE::cbegin(view2), KE::cend(view2),
|
||||
KE::begin(view2), init_value, bop, uop);
|
||||
ASSERT_EQ(r, KE::end(view2));
|
||||
verify_data(view1, view2, init_value, bop, uop);
|
||||
}
|
||||
|
||||
{
|
||||
fill_view(view2, name);
|
||||
auto r = KE::transform_exclusive_scan(exespace(), view2, view2, init_value,
|
||||
bop, uop);
|
||||
ASSERT_EQ(r, KE::end(view2));
|
||||
verify_data(view1, view2, init_value, bop, uop);
|
||||
}
|
||||
|
||||
{
|
||||
fill_view(view2, name);
|
||||
auto r = KE::transform_exclusive_scan("label", exespace(), view2, view2,
|
||||
init_value, bop, uop);
|
||||
ASSERT_EQ(r, KE::end(view2));
|
||||
verify_data(view1, view2, init_value, bop, uop);
|
||||
}
|
||||
|
||||
Kokkos::fence();
|
||||
}
|
||||
|
||||
template <class Tag, class ValueType>
|
||||
void run_all_scenarios() {
|
||||
const std::map<std::string, std::size_t> scenarios = {
|
||||
@ -267,6 +314,11 @@ void run_all_scenarios() {
|
||||
run_single_scenario<Tag, ValueType>(it, ValueType{1}, bop_t(), uop_t());
|
||||
run_single_scenario<Tag, ValueType>(it, ValueType{-2}, bop_t(), uop_t());
|
||||
run_single_scenario<Tag, ValueType>(it, ValueType{3}, bop_t(), uop_t());
|
||||
|
||||
run_single_scenario_inplace<Tag, ValueType>(it, ValueType{0}, bop_t(),
|
||||
uop_t());
|
||||
run_single_scenario_inplace<Tag, ValueType>(it, ValueType{-2}, bop_t(),
|
||||
uop_t());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -16,6 +16,7 @@
|
||||
|
||||
#include <TestStdAlgorithmsCommon.hpp>
|
||||
#include <utility>
|
||||
#include <iomanip>
|
||||
|
||||
namespace Test {
|
||||
namespace stdalgos {
|
||||
@ -172,24 +173,15 @@ void verify_data(ViewType1 data_view, // contains data
|
||||
create_mirror_view_and_copy(Kokkos::HostSpace(), test_view_dc);
|
||||
if (test_view_h.extent(0) > 0) {
|
||||
for (std::size_t i = 0; i < test_view_h.extent(0); ++i) {
|
||||
// std::cout << i << " " << std::setprecision(15) << data_view_h(i) << " "
|
||||
// << gold_h(i) << " " << test_view_h(i) << " "
|
||||
// << std::abs(gold_h(i) - test_view_h(i)) << std::endl;
|
||||
|
||||
if (std::is_same<gold_view_value_type, int>::value) {
|
||||
ASSERT_EQ(gold_h(i), test_view_h(i));
|
||||
} else {
|
||||
const auto error = std::abs(gold_h(i) - test_view_h(i));
|
||||
if (error > 1e-10) {
|
||||
std::cout << i << " " << std::setprecision(15) << data_view_h(i)
|
||||
<< " " << gold_h(i) << " " << test_view_h(i) << " "
|
||||
<< std::abs(gold_h(i) - test_view_h(i)) << std::endl;
|
||||
}
|
||||
EXPECT_LT(error, 1e-10);
|
||||
ASSERT_LT(error, 1e-10) << i << " " << std::setprecision(15) << error
|
||||
<< static_cast<double>(test_view_h(i)) << " "
|
||||
<< static_cast<double>(gold_h(i));
|
||||
}
|
||||
}
|
||||
// std::cout << " last el: " << test_view_h(test_view_h.extent(0)-1) <<
|
||||
// std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
@ -210,30 +202,11 @@ struct SumBinaryFunctor {
|
||||
std::string value_type_to_string(int) { return "int"; }
|
||||
std::string value_type_to_string(double) { return "double"; }
|
||||
|
||||
template <class Tag, class BopT, class UopT>
|
||||
void print_scenario_details(const std::string& name, BopT bop, UopT uop) {
|
||||
(void)bop;
|
||||
(void)uop;
|
||||
std::cout << "transform_inclusive_scan: " << name << ", "
|
||||
<< view_tag_to_string(Tag{}) << std::endl;
|
||||
}
|
||||
|
||||
template <class Tag, class BopT, class UopT, class ValueType>
|
||||
void print_scenario_details(const std::string& name, BopT bop, UopT uop,
|
||||
ValueType init_value) {
|
||||
(void)bop;
|
||||
(void)uop;
|
||||
std::cout << "transform_inclusive_scan: " << name << ", "
|
||||
<< view_tag_to_string(Tag{}) << ", "
|
||||
<< "init = " << init_value << std::endl;
|
||||
}
|
||||
|
||||
template <class Tag, class ValueType, class InfoType, class... Args>
|
||||
void run_single_scenario(const InfoType& scenario_info,
|
||||
Args... args /* by value on purpose*/) {
|
||||
const auto name = std::get<0>(scenario_info);
|
||||
const std::size_t view_ext = std::get<1>(scenario_info);
|
||||
// print_scenario_details<Tag>(name, args...);
|
||||
|
||||
auto view_dest =
|
||||
create_view<ValueType>(Tag{}, view_ext, "transform_inclusive_scan");
|
||||
@ -278,6 +251,63 @@ void run_single_scenario(const InfoType& scenario_info,
|
||||
Kokkos::fence();
|
||||
}
|
||||
|
||||
template <class Tag, class ValueType, class InfoType, class... Args>
|
||||
void run_single_scenario_inplace(const InfoType& scenario_info,
|
||||
Args... args /* by value on purpose*/) {
|
||||
const auto name = std::get<0>(scenario_info);
|
||||
const std::size_t view_ext = std::get<1>(scenario_info);
|
||||
|
||||
// since here we call the in-place operation, we need to use two views:
|
||||
// view1: filled according to scenario and is not modified
|
||||
// view2: filled according scenario and used for the in-place op
|
||||
// Therefore, after the op is done, view_2 should contain the
|
||||
// result of doing exclusive scan.
|
||||
// NOTE: view2 must be filled before every call to the algorithm
|
||||
// because the algorithm acts in place
|
||||
|
||||
auto view_1 = create_view<ValueType>(Tag{}, view_ext,
|
||||
"transform_inclusive_scan_view_1");
|
||||
fill_view(view_1, name);
|
||||
|
||||
auto view_2 = create_view<ValueType>(Tag{}, view_ext,
|
||||
"transform_inclusive_scan_view_2");
|
||||
|
||||
{
|
||||
fill_view(view_2, name);
|
||||
auto r = KE::transform_inclusive_scan(exespace(), KE::cbegin(view_2),
|
||||
KE::cend(view_2), KE::begin(view_2),
|
||||
args...);
|
||||
ASSERT_EQ(r, KE::end(view_2));
|
||||
verify_data(view_1, view_2, args...);
|
||||
}
|
||||
|
||||
{
|
||||
fill_view(view_2, name);
|
||||
auto r = KE::transform_inclusive_scan("label", exespace(),
|
||||
KE::cbegin(view_2), KE::cend(view_2),
|
||||
KE::begin(view_2), args...);
|
||||
ASSERT_EQ(r, KE::end(view_2));
|
||||
verify_data(view_1, view_2, args...);
|
||||
}
|
||||
|
||||
{
|
||||
fill_view(view_2, name);
|
||||
auto r = KE::transform_inclusive_scan(exespace(), view_2, view_2, args...);
|
||||
ASSERT_EQ(r, KE::end(view_2));
|
||||
verify_data(view_1, view_2, args...);
|
||||
}
|
||||
|
||||
{
|
||||
fill_view(view_2, name);
|
||||
auto r = KE::transform_inclusive_scan("label", exespace(), view_2, view_2,
|
||||
args...);
|
||||
ASSERT_EQ(r, KE::end(view_2));
|
||||
verify_data(view_1, view_2, args...);
|
||||
}
|
||||
|
||||
Kokkos::fence();
|
||||
}
|
||||
|
||||
template <class Tag, class ValueType>
|
||||
void run_all_scenarios() {
|
||||
const std::map<std::string, std::size_t> scenarios = {
|
||||
@ -294,15 +324,23 @@ void run_all_scenarios() {
|
||||
run_single_scenario<Tag, ValueType>(it, bop_t(), uop_t(), ValueType{2});
|
||||
run_single_scenario<Tag, ValueType>(it, bop_t(), uop_t(), ValueType{-1});
|
||||
run_single_scenario<Tag, ValueType>(it, bop_t(), uop_t(), ValueType{-2});
|
||||
|
||||
run_single_scenario_inplace<Tag, ValueType>(it, bop_t(), uop_t());
|
||||
run_single_scenario_inplace<Tag, ValueType>(it, bop_t(), uop_t(),
|
||||
ValueType{0});
|
||||
run_single_scenario_inplace<Tag, ValueType>(it, bop_t(), uop_t(),
|
||||
ValueType{2});
|
||||
run_single_scenario_inplace<Tag, ValueType>(it, bop_t(), uop_t(),
|
||||
ValueType{-2});
|
||||
}
|
||||
}
|
||||
|
||||
#if !defined KOKKOS_ENABLE_OPENMPTARGET
|
||||
TEST(std_algorithms_numeric_ops_test, transform_inclusive_scan) {
|
||||
run_all_scenarios<DynamicTag, double>();
|
||||
// run_all_scenarios<StridedThreeTag, double>();
|
||||
// run_all_scenarios<DynamicTag, int>();
|
||||
// run_all_scenarios<StridedThreeTag, int>();
|
||||
run_all_scenarios<StridedThreeTag, double>();
|
||||
run_all_scenarios<DynamicTag, int>();
|
||||
run_all_scenarios<StridedThreeTag, int>();
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
@ -83,9 +83,6 @@ auto run_min_or_max_test(ViewType view, StdReducersTestEnumOrder enValue) {
|
||||
static_assert(std::is_same<ExeSpace, Kokkos::HostSpace>::value,
|
||||
"test is only enabled for HostSpace");
|
||||
|
||||
std::cout << "checking reduction with order: " << order_to_string(enValue)
|
||||
<< "\n";
|
||||
|
||||
using view_value_type = typename ViewType::value_type;
|
||||
using reducer_type = std::conditional_t<
|
||||
(flag == 0), Kokkos::MaxFirstLoc<view_value_type, IndexType, ExeSpace>,
|
||||
@ -132,18 +129,24 @@ TEST(std_algorithms_reducers, max_first_loc) {
|
||||
|
||||
const auto pair1 = run_min_or_max_test<0, hostspace, index_type>(
|
||||
view_h, StdReducersTestEnumOrder::LeftToRight);
|
||||
ASSERT_EQ(pair1.first, gold_value);
|
||||
ASSERT_EQ(pair1.second, gold_location);
|
||||
ASSERT_EQ(pair1.first, gold_value)
|
||||
<< order_to_string(StdReducersTestEnumOrder::LeftToRight);
|
||||
ASSERT_EQ(pair1.second, gold_location)
|
||||
<< order_to_string(StdReducersTestEnumOrder::LeftToRight);
|
||||
|
||||
const auto pair2 = run_min_or_max_test<0, hostspace, index_type>(
|
||||
view_h, StdReducersTestEnumOrder::RightToLeft);
|
||||
ASSERT_EQ(pair2.first, gold_value);
|
||||
ASSERT_EQ(pair2.second, gold_location);
|
||||
ASSERT_EQ(pair2.first, gold_value)
|
||||
<< order_to_string(StdReducersTestEnumOrder::RightToLeft);
|
||||
ASSERT_EQ(pair2.second, gold_location)
|
||||
<< order_to_string(StdReducersTestEnumOrder::RightToLeft);
|
||||
|
||||
const auto pair3 = run_min_or_max_test<0, hostspace, index_type>(
|
||||
view_h, StdReducersTestEnumOrder::Random);
|
||||
ASSERT_EQ(pair3.first, gold_value);
|
||||
ASSERT_EQ(pair3.second, gold_location);
|
||||
ASSERT_EQ(pair3.first, gold_value)
|
||||
<< order_to_string(StdReducersTestEnumOrder::Random);
|
||||
ASSERT_EQ(pair3.second, gold_location)
|
||||
<< order_to_string(StdReducersTestEnumOrder::Random);
|
||||
}
|
||||
|
||||
TEST(std_algorithms_reducers, min_first_loc) {
|
||||
@ -191,9 +194,6 @@ void run_min_max_test(ViewType view, StdReducersTestEnumOrder enValue,
|
||||
static_assert(std::is_same<ExeSpace, Kokkos::HostSpace>::value,
|
||||
"test is only enabled for HostSpace");
|
||||
|
||||
std::cout << "checking reduction with order: " << order_to_string(enValue)
|
||||
<< "\n";
|
||||
|
||||
using view_value_type = typename ViewType::value_type;
|
||||
using reducer_type =
|
||||
Kokkos::MinMaxFirstLastLoc<view_value_type, IndexType, ExeSpace>;
|
||||
@ -212,10 +212,10 @@ void run_min_max_test(ViewType view, StdReducersTestEnumOrder enValue,
|
||||
reduction_value_type{view(index), view(index), index, index});
|
||||
}
|
||||
|
||||
ASSERT_EQ(red_result.min_val, gold_values.first);
|
||||
ASSERT_EQ(red_result.max_val, gold_values.second);
|
||||
ASSERT_EQ(red_result.min_loc, gold_locs.first);
|
||||
ASSERT_EQ(red_result.max_loc, gold_locs.second);
|
||||
ASSERT_EQ(red_result.min_val, gold_values.first) << order_to_string(enValue);
|
||||
ASSERT_EQ(red_result.max_val, gold_values.second) << order_to_string(enValue);
|
||||
ASSERT_EQ(red_result.min_loc, gold_locs.first) << order_to_string(enValue);
|
||||
ASSERT_EQ(red_result.max_loc, gold_locs.second) << order_to_string(enValue);
|
||||
}
|
||||
|
||||
TEST(std_algorithms_reducers, min_max_first_last_loc) {
|
||||
|
||||
@ -1 +1,12 @@
|
||||
#FIXME_OPENMPTARGET - compiling in debug mode causes ICE.
|
||||
KOKKOS_ADD_BENCHMARK_DIRECTORIES(atomic)
|
||||
KOKKOS_ADD_BENCHMARK_DIRECTORIES(gather)
|
||||
KOKKOS_ADD_BENCHMARK_DIRECTORIES(gups)
|
||||
KOKKOS_ADD_BENCHMARK_DIRECTORIES(launch_latency)
|
||||
KOKKOS_ADD_BENCHMARK_DIRECTORIES(stream)
|
||||
|
||||
#FIXME_OPENMPTARGET - These two benchmarks cause ICE. Commenting them for now but a deeper analysis on the cause and a possible fix will follow.
|
||||
IF(NOT Kokkos_ENABLE_OPENMPTARGET)
|
||||
KOKKOS_ADD_BENCHMARK_DIRECTORIES(policy_performance)
|
||||
KOKKOS_ADD_BENCHMARK_DIRECTORIES(bytes_and_flops)
|
||||
ENDIF()
|
||||
|
||||
4
lib/kokkos/benchmarks/atomic/CMakeLists.txt
Normal file
4
lib/kokkos/benchmarks/atomic/CMakeLists.txt
Normal file
@ -0,0 +1,4 @@
|
||||
KOKKOS_ADD_EXECUTABLE(
|
||||
atomic
|
||||
SOURCES main.cpp
|
||||
)
|
||||
4
lib/kokkos/benchmarks/bytes_and_flops/CMakeLists.txt
Normal file
4
lib/kokkos/benchmarks/bytes_and_flops/CMakeLists.txt
Normal file
@ -0,0 +1,4 @@
|
||||
KOKKOS_ADD_EXECUTABLE(
|
||||
bytes_and_flops
|
||||
SOURCES bench_double.cpp bench_float.cpp bench_int32_t.cpp bench_int64_t.cpp main.cpp
|
||||
)
|
||||
@ -37,22 +37,22 @@ struct RunStride {
|
||||
};
|
||||
|
||||
#define STRIDE 1
|
||||
#include <bench_stride.hpp>
|
||||
#include "bench_stride.hpp"
|
||||
#undef STRIDE
|
||||
#define STRIDE 2
|
||||
#include <bench_stride.hpp>
|
||||
#include "bench_stride.hpp"
|
||||
#undef STRIDE
|
||||
#define STRIDE 4
|
||||
#include <bench_stride.hpp>
|
||||
#include "bench_stride.hpp"
|
||||
#undef STRIDE
|
||||
#define STRIDE 8
|
||||
#include <bench_stride.hpp>
|
||||
#include "bench_stride.hpp"
|
||||
#undef STRIDE
|
||||
#define STRIDE 16
|
||||
#include <bench_stride.hpp>
|
||||
#include "bench_stride.hpp"
|
||||
#undef STRIDE
|
||||
#define STRIDE 32
|
||||
#include <bench_stride.hpp>
|
||||
#include "bench_stride.hpp"
|
||||
#undef STRIDE
|
||||
|
||||
template <class Scalar>
|
||||
|
||||
@ -14,7 +14,7 @@
|
||||
//
|
||||
//@HEADER
|
||||
|
||||
#include <bench.hpp>
|
||||
#include "bench.hpp"
|
||||
|
||||
template void run_stride_unroll<double>(int N, int K, int R, int D, int U,
|
||||
int F, int T, int S, int B, int I);
|
||||
|
||||
@ -14,7 +14,7 @@
|
||||
//
|
||||
//@HEADER
|
||||
|
||||
#include <bench.hpp>
|
||||
#include "bench.hpp"
|
||||
|
||||
template void run_stride_unroll<float>(int N, int K, int R, int D, int U, int F,
|
||||
int T, int S, int B, int I);
|
||||
|
||||
@ -14,7 +14,7 @@
|
||||
//
|
||||
//@HEADER
|
||||
|
||||
#include <bench.hpp>
|
||||
#include "bench.hpp"
|
||||
|
||||
template void run_stride_unroll<int32_t>(int N, int K, int R, int D, int U,
|
||||
int F, int T, int S, int B, int I);
|
||||
|
||||
@ -14,7 +14,7 @@
|
||||
//
|
||||
//@HEADER
|
||||
|
||||
#include <bench.hpp>
|
||||
#include "bench.hpp"
|
||||
|
||||
template void run_stride_unroll<int64_t>(int N, int K, int R, int D, int U,
|
||||
int F, int T, int S, int B, int I);
|
||||
|
||||
@ -15,28 +15,28 @@
|
||||
//@HEADER
|
||||
|
||||
#define UNROLL 1
|
||||
#include <bench_unroll_stride.hpp>
|
||||
#include "bench_unroll_stride.hpp"
|
||||
#undef UNROLL
|
||||
#define UNROLL 2
|
||||
#include <bench_unroll_stride.hpp>
|
||||
#include "bench_unroll_stride.hpp"
|
||||
#undef UNROLL
|
||||
#define UNROLL 3
|
||||
#include <bench_unroll_stride.hpp>
|
||||
#include "bench_unroll_stride.hpp"
|
||||
#undef UNROLL
|
||||
#define UNROLL 4
|
||||
#include <bench_unroll_stride.hpp>
|
||||
#include "bench_unroll_stride.hpp"
|
||||
#undef UNROLL
|
||||
#define UNROLL 5
|
||||
#include <bench_unroll_stride.hpp>
|
||||
#include "bench_unroll_stride.hpp"
|
||||
#undef UNROLL
|
||||
#define UNROLL 6
|
||||
#include <bench_unroll_stride.hpp>
|
||||
#include "bench_unroll_stride.hpp"
|
||||
#undef UNROLL
|
||||
#define UNROLL 7
|
||||
#include <bench_unroll_stride.hpp>
|
||||
#include "bench_unroll_stride.hpp"
|
||||
#undef UNROLL
|
||||
#define UNROLL 8
|
||||
#include <bench_unroll_stride.hpp>
|
||||
#include "bench_unroll_stride.hpp"
|
||||
#undef UNROLL
|
||||
|
||||
template <class Scalar>
|
||||
|
||||
@ -26,7 +26,7 @@ struct Run<Scalar, UNROLL, STRIDE> {
|
||||
Kokkos::deep_copy(C, Scalar(3.5));
|
||||
|
||||
Kokkos::Timer timer;
|
||||
for (int i = 0; i < I; ++i) {
|
||||
for (int iter = 0; iter < I; ++iter) {
|
||||
Kokkos::parallel_for(
|
||||
"BenchmarkKernel",
|
||||
Kokkos::TeamPolicy<>(N, T).set_scratch_size(0, Kokkos::PerTeam(S)),
|
||||
|
||||
@ -16,7 +16,7 @@
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
#include <Kokkos_Timer.hpp>
|
||||
#include <bench.hpp>
|
||||
#include "bench.hpp"
|
||||
#include <cstdlib>
|
||||
|
||||
extern template void run_stride_unroll<float>(int, int, int, int, int, int, int,
|
||||
@ -86,7 +86,7 @@ int main(int argc, char* argv[]) {
|
||||
printf("D must be one of 1,2,4,8,16,32\n");
|
||||
return 0;
|
||||
}
|
||||
if ((P < 1) && (P > 2)) {
|
||||
if ((P < 1) || (P > 4)) {
|
||||
printf("P must be one of 1,2,3,4\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
4
lib/kokkos/benchmarks/gather/CMakeLists.txt
Normal file
4
lib/kokkos/benchmarks/gather/CMakeLists.txt
Normal file
@ -0,0 +1,4 @@
|
||||
KOKKOS_ADD_EXECUTABLE(
|
||||
gather
|
||||
SOURCES main.cpp
|
||||
)
|
||||
@ -20,28 +20,28 @@ struct RunGather {
|
||||
};
|
||||
|
||||
#define UNROLL 1
|
||||
#include <gather_unroll.hpp>
|
||||
#include "gather_unroll.hpp"
|
||||
#undef UNROLL
|
||||
#define UNROLL 2
|
||||
#include <gather_unroll.hpp>
|
||||
#include "gather_unroll.hpp"
|
||||
#undef UNROLL
|
||||
#define UNROLL 3
|
||||
#include <gather_unroll.hpp>
|
||||
#include "gather_unroll.hpp"
|
||||
#undef UNROLL
|
||||
#define UNROLL 4
|
||||
#include <gather_unroll.hpp>
|
||||
#include "gather_unroll.hpp"
|
||||
#undef UNROLL
|
||||
#define UNROLL 5
|
||||
#include <gather_unroll.hpp>
|
||||
#include "gather_unroll.hpp"
|
||||
#undef UNROLL
|
||||
#define UNROLL 6
|
||||
#include <gather_unroll.hpp>
|
||||
#include "gather_unroll.hpp"
|
||||
#undef UNROLL
|
||||
#define UNROLL 7
|
||||
#include <gather_unroll.hpp>
|
||||
#include "gather_unroll.hpp"
|
||||
#undef UNROLL
|
||||
#define UNROLL 8
|
||||
#include <gather_unroll.hpp>
|
||||
#include "gather_unroll.hpp"
|
||||
#undef UNROLL
|
||||
|
||||
template <class Scalar>
|
||||
|
||||
@ -138,7 +138,7 @@ struct RunGather<Scalar, UNROLL> {
|
||||
printf(
|
||||
"SNKDRUF: %i %i %i %i %i %i %i Time: %lfs Bandwidth: %lfGiB/s GFlop/s: "
|
||||
"%lf GGather/s: %lf\n",
|
||||
sizeof(Scalar) / 4, N, K, D, R, UNROLL, F, seconds,
|
||||
static_cast<int>(sizeof(Scalar) / 4), N, K, D, R, UNROLL, F, seconds,
|
||||
1.0 * bytes / seconds / 1024 / 1024 / 1024, 1.e-9 * flops / seconds,
|
||||
1.e-9 * gather_ops / seconds);
|
||||
}
|
||||
|
||||
@ -16,7 +16,7 @@
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
#include <Kokkos_Timer.hpp>
|
||||
#include <gather.hpp>
|
||||
#include "gather.hpp"
|
||||
#include <cstdlib>
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
|
||||
4
lib/kokkos/benchmarks/launch_latency/CMakeLists.txt
Normal file
4
lib/kokkos/benchmarks/launch_latency/CMakeLists.txt
Normal file
@ -0,0 +1,4 @@
|
||||
KOKKOS_ADD_EXECUTABLE(
|
||||
launch_latency
|
||||
SOURCES launch_latency.cpp
|
||||
)
|
||||
283
lib/kokkos/benchmarks/launch_latency/launch_latency.cpp
Normal file
283
lib/kokkos/benchmarks/launch_latency/launch_latency.cpp
Normal file
@ -0,0 +1,283 @@
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 4.0
|
||||
// Copyright (2022) National Technology & Engineering
|
||||
// Solutions of Sandia, LLC (NTESS).
|
||||
//
|
||||
// Under the terms of Contract DE-NA0003525 with NTESS,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://kokkos.org/LICENSE for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//@HEADER
|
||||
|
||||
/*! \file launch_latency.cpp
|
||||
|
||||
Tests of parallel_for and parallel_reduce latency for different
|
||||
circumstances.
|
||||
|
||||
Three launch kinds are tested: parallel_for, parallel_reduce into scalar,
|
||||
and parallel_reduce into view
|
||||
|
||||
N controls how large the parallel loops is
|
||||
V controls how large the functor is
|
||||
M controls across how many launches the latency is averaged
|
||||
K controls how larege the nested loop is (no larger than V)
|
||||
|
||||
For each launch kind,
|
||||
1. Avg functor dispatch latency: (time to do M launches) / M
|
||||
2. Avg functor completion throughput: (M launches + sync) / M
|
||||
3. Avg functor completion latency: (M (launch + sync)) / M
|
||||
*/
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
|
||||
template <int V>
|
||||
struct TestFunctor {
|
||||
double values[V];
|
||||
Kokkos::View<double*> a;
|
||||
int K;
|
||||
TestFunctor(Kokkos::View<double*> a_, int K_) : a(a_), K(K_) {}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(const int i) const {
|
||||
for (int j = 0; j < K; j++) a(i) += 1.0 * i * values[j];
|
||||
}
|
||||
};
|
||||
|
||||
template <int V>
|
||||
struct TestRFunctor {
|
||||
double values[V];
|
||||
Kokkos::View<double*> a;
|
||||
int K;
|
||||
TestRFunctor(Kokkos::View<double*> a_, int K_) : a(a_), K(K_) {}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(const int i, double& lsum) const {
|
||||
for (int j = 0; j < K; j++) a(i) += 1.0 * i * values[j];
|
||||
lsum += a(i);
|
||||
}
|
||||
};
|
||||
|
||||
struct Opts {
|
||||
bool par_for = true;
|
||||
bool par_reduce = true;
|
||||
bool par_reduce_view = true;
|
||||
};
|
||||
|
||||
template <int V>
|
||||
void run(int N, int M, int K, const Opts& opts) {
|
||||
std::string l_no_fence, l_fence, l_red_no_fence, l_red_fence,
|
||||
l_red_view_no_fence, l_red_view_fence;
|
||||
{
|
||||
std::ostringstream ostream;
|
||||
ostream << "RunNoFence_" << N << "_" << K << std::endl;
|
||||
l_no_fence = ostream.str();
|
||||
}
|
||||
{
|
||||
std::ostringstream ostream;
|
||||
ostream << "RunFence_" << N << "_" << K << std::endl;
|
||||
l_fence = ostream.str();
|
||||
}
|
||||
{
|
||||
std::ostringstream ostream;
|
||||
ostream << "RunReduceNoFence_" << N << "_" << K << std::endl;
|
||||
l_red_no_fence = ostream.str();
|
||||
}
|
||||
{
|
||||
std::ostringstream ostream;
|
||||
ostream << "RunReduceFence_" << N << "_" << K << std::endl;
|
||||
l_red_fence = ostream.str();
|
||||
}
|
||||
{
|
||||
std::ostringstream ostream;
|
||||
ostream << "RunReduceViewNoFence_" << N << "_" << K << std::endl;
|
||||
l_red_view_no_fence = ostream.str();
|
||||
}
|
||||
{
|
||||
std::ostringstream ostream;
|
||||
ostream << "RunReduceViewFence_" << N << "_" << K << std::endl;
|
||||
l_red_view_fence = ostream.str();
|
||||
}
|
||||
|
||||
double result;
|
||||
Kokkos::View<double*> a("A", N);
|
||||
Kokkos::View<double> v_result("result");
|
||||
TestFunctor<V> f(a, K);
|
||||
TestRFunctor<V> rf(a, K);
|
||||
Kokkos::Timer timer;
|
||||
|
||||
// initialize to an obviously wrong value
|
||||
double time_no_fence = -1; // launch loop
|
||||
double time_no_fence_fenced = -1; // launch loop then fence
|
||||
double time_fence = -1; // launch&fence loop
|
||||
|
||||
double time_red_no_fence = -1;
|
||||
double time_red_no_fence_fenced = -1;
|
||||
double time_red_fence = -1;
|
||||
|
||||
double time_red_view_no_fence = -1;
|
||||
double time_red_view_no_fence_fenced = -1;
|
||||
double time_red_view_fence = -1;
|
||||
|
||||
if (opts.par_for) {
|
||||
// warmup
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
Kokkos::parallel_for(l_no_fence, N, f);
|
||||
}
|
||||
Kokkos::fence();
|
||||
|
||||
timer.reset();
|
||||
for (int i = 0; i < M; i++) {
|
||||
Kokkos::parallel_for(l_no_fence, N, f);
|
||||
}
|
||||
time_no_fence = timer.seconds();
|
||||
Kokkos::fence();
|
||||
time_no_fence_fenced = timer.seconds();
|
||||
|
||||
timer.reset();
|
||||
for (int i = 0; i < M; i++) {
|
||||
Kokkos::parallel_for(l_fence, N, f);
|
||||
Kokkos::fence();
|
||||
}
|
||||
time_fence = timer.seconds();
|
||||
}
|
||||
|
||||
if (opts.par_reduce) {
|
||||
// warmup
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
Kokkos::parallel_reduce(l_red_no_fence, N, rf, result);
|
||||
}
|
||||
Kokkos::fence();
|
||||
|
||||
timer.reset();
|
||||
for (int i = 0; i < M; i++) {
|
||||
Kokkos::parallel_reduce(l_red_no_fence, N, rf, result);
|
||||
}
|
||||
time_red_no_fence = timer.seconds();
|
||||
Kokkos::fence();
|
||||
time_red_no_fence_fenced = timer.seconds();
|
||||
|
||||
timer.reset();
|
||||
for (int i = 0; i < M; i++) {
|
||||
Kokkos::parallel_reduce(l_red_fence, N, rf, result);
|
||||
Kokkos::fence();
|
||||
}
|
||||
time_red_fence = timer.seconds();
|
||||
Kokkos::fence();
|
||||
}
|
||||
|
||||
if (opts.par_reduce_view) {
|
||||
// warmup
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
Kokkos::parallel_reduce(l_red_view_no_fence, N, rf, v_result);
|
||||
}
|
||||
Kokkos::fence();
|
||||
|
||||
timer.reset();
|
||||
for (int i = 0; i < M; i++) {
|
||||
Kokkos::parallel_reduce(l_red_view_no_fence, N, rf, v_result);
|
||||
}
|
||||
time_red_view_no_fence = timer.seconds();
|
||||
Kokkos::fence();
|
||||
time_red_view_no_fence_fenced = timer.seconds();
|
||||
|
||||
timer.reset();
|
||||
for (int i = 0; i < M; i++) {
|
||||
Kokkos::parallel_reduce(l_red_view_fence, N, rf, v_result);
|
||||
Kokkos::fence();
|
||||
}
|
||||
time_red_view_fence = timer.seconds();
|
||||
Kokkos::fence();
|
||||
timer.reset();
|
||||
}
|
||||
|
||||
const double x = 1.e6 / M;
|
||||
printf("%i %i %i %i", N, V, K, M);
|
||||
if (opts.par_for) {
|
||||
printf(" parallel_for: %lf %lf ( %lf )", x * time_no_fence, x * time_fence,
|
||||
x * time_no_fence_fenced);
|
||||
}
|
||||
if (opts.par_reduce) {
|
||||
printf(" parallel_reduce: %lf %lf ( %lf )", x * time_red_no_fence,
|
||||
x * time_red_fence, x * time_red_no_fence_fenced);
|
||||
}
|
||||
if (opts.par_reduce_view) {
|
||||
printf(" parallel_reduce(view): %lf %lf ( %lf )",
|
||||
x * time_red_view_no_fence, x * time_red_view_fence,
|
||||
x * time_red_view_no_fence_fenced);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
int main(int argc, char* argv[]) {
|
||||
Kokkos::initialize(argc, argv);
|
||||
{
|
||||
int N = 10000;
|
||||
int M = 20;
|
||||
int K = 1;
|
||||
|
||||
Opts opts;
|
||||
|
||||
printf("==========================\n");
|
||||
printf("Kokkos Launch Latency Test\n");
|
||||
printf("==========================\n");
|
||||
printf("\n");
|
||||
printf("Usage: %s ARGUMENTS [OPTIONS...]\n\n", argv[0]);
|
||||
printf("Arguments: N M K\n");
|
||||
printf(" N: loop length\n");
|
||||
printf(" M: how many kernels to dispatch\n");
|
||||
printf(
|
||||
" K: nested loop length (capped by size of functor member array\n\n");
|
||||
printf("Options:\n");
|
||||
printf(" --no-parallel-for: skip parallel_for benchmark\n");
|
||||
printf(" --no-parallel-reduce: skip parallel_reduce benchmark\n");
|
||||
printf(
|
||||
" --no-parallel-reduce-view: skip parallel_reduce into view "
|
||||
"benchmark\n");
|
||||
printf("\n\n");
|
||||
printf(" Output V is the size of the functor member array\n");
|
||||
printf("\n\n");
|
||||
|
||||
for (int i = 1; i < argc; ++i) {
|
||||
const std::string_view arg(argv[i]);
|
||||
|
||||
// anything that doesn't start with --
|
||||
if (arg.size() < 2 ||
|
||||
(arg.size() >= 2 && arg[0] != '-' && arg[1] != '-')) {
|
||||
if (i == 1)
|
||||
N = atoi(arg.data());
|
||||
else if (i == 2)
|
||||
M = atoi(arg.data());
|
||||
else if (i == 3)
|
||||
K = atoi(arg.data());
|
||||
else {
|
||||
throw std::runtime_error("unexpected argument!");
|
||||
}
|
||||
} else if (arg == "--no-parallel-for") {
|
||||
opts.par_for = false;
|
||||
} else if (arg == "--no-parallel-reduce") {
|
||||
opts.par_reduce = false;
|
||||
} else if (arg == "--no-parallel-reduce-view") {
|
||||
opts.par_reduce_view = false;
|
||||
} else {
|
||||
std::stringstream ss;
|
||||
ss << "unexpected argument \"" << arg << "\" at position " << i;
|
||||
throw std::runtime_error(ss.str());
|
||||
}
|
||||
}
|
||||
|
||||
printf("N V K M time_no_fence time_fence (time_no_fence_fenced)\n");
|
||||
|
||||
/* A backend may have different launch strategies for functors of different
|
||||
* sizes: test a variety of functor sizes.*/
|
||||
run<1>(N, M, K <= 1 ? K : 1, opts);
|
||||
run<16>(N, M, K <= 16 ? K : 16, opts);
|
||||
run<200>(N, M, K <= 200 ? K : 200, opts);
|
||||
run<3000>(N, M, K <= 3000 ? K : 3000, opts);
|
||||
run<30000>(N, M, K <= 30000 ? K : 30000, opts);
|
||||
}
|
||||
Kokkos::finalize();
|
||||
}
|
||||
4
lib/kokkos/benchmarks/policy_performance/CMakeLists.txt
Normal file
4
lib/kokkos/benchmarks/policy_performance/CMakeLists.txt
Normal file
@ -0,0 +1,4 @@
|
||||
KOKKOS_ADD_EXECUTABLE(
|
||||
policy_performance
|
||||
SOURCES main.cpp
|
||||
)
|
||||
@ -106,8 +106,9 @@ int main(int argc, char* argv[]) {
|
||||
|
||||
Kokkos::parallel_reduce(
|
||||
"parallel_reduce warmup", Kokkos::TeamPolicy<>(10, 1),
|
||||
KOKKOS_LAMBDA(const Kokkos::TeamPolicy<>::member_type team,
|
||||
double& lval) { lval += 1; },
|
||||
KOKKOS_LAMBDA(const Kokkos::TeamPolicy<>::member_type&, double& lval) {
|
||||
lval += 1;
|
||||
},
|
||||
result);
|
||||
|
||||
using view_type_1d = Kokkos::View<double*, Kokkos::LayoutRight>;
|
||||
|
||||
@ -21,13 +21,13 @@ struct ParallelScanFunctor {
|
||||
using value_type = double;
|
||||
ViewType v;
|
||||
|
||||
ParallelScanFunctor(const ViewType& v_) : v(v_) {}
|
||||
explicit ParallelScanFunctor(const ViewType& v_) : v(v_) {}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(const int idx, value_type& val, const bool& final) const {
|
||||
void operator()(const int idx, value_type& val, const bool& is_final) const {
|
||||
// inclusive scan
|
||||
val += v(idx);
|
||||
if (final) {
|
||||
if (is_final) {
|
||||
v(idx) = val;
|
||||
}
|
||||
}
|
||||
@ -109,7 +109,7 @@ void test_policy(int team_range, int thread_range, int vector_range,
|
||||
vector_result = 0.0;
|
||||
Kokkos::parallel_reduce(
|
||||
Kokkos::ThreadVectorRange(team, vector_range),
|
||||
[&](const int vi, double& vval) { vval += 1; },
|
||||
[&](const int, double& vval) { vval += 1; },
|
||||
vector_result);
|
||||
}
|
||||
v2(idx, t) = vector_result;
|
||||
@ -128,7 +128,7 @@ void test_policy(int team_range, int thread_range, int vector_range,
|
||||
team_result = 0.0;
|
||||
Kokkos::parallel_reduce(
|
||||
Kokkos::TeamThreadRange(team, thread_range),
|
||||
[&](const int t, double& lval) { lval += 1; }, team_result);
|
||||
[&](const int, double& lval) { lval += 1; }, team_result);
|
||||
}
|
||||
v1(idx) = team_result;
|
||||
// prevent compiler optimizing loop away
|
||||
@ -170,13 +170,13 @@ void test_policy(int team_range, int thread_range, int vector_range,
|
||||
for (int tr = 0; tr < thread_repeat; ++tr) {
|
||||
Kokkos::parallel_reduce(
|
||||
Kokkos::TeamThreadRange(team, thread_range),
|
||||
[&](const int t, double& lval) {
|
||||
[&](const int, double& lval) {
|
||||
double vector_result = 0.0;
|
||||
for (int vr = 0; vr < inner_repeat; ++vr) {
|
||||
vector_result = 0.0;
|
||||
Kokkos::parallel_reduce(
|
||||
Kokkos::ThreadVectorRange(team, vector_range),
|
||||
[&](const int vi, double& vval) { vval += 1; },
|
||||
[&](const int, double& vval) { vval += 1; },
|
||||
vector_result);
|
||||
lval += vector_result;
|
||||
}
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user