diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index 8267a9bf91..0aa86efcfb 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -105,7 +105,7 @@ if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel") if(CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL 17.3 OR CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL 17.4) set(CMAKE_TUNE_DEFAULT "-xCOMMON-AVX512") else() - set(CMAKE_TUNE_DEFAULT "-xHost") + set(CMAKE_TUNE_DEFAULT "-xHost -fp-model fast=2 -no-prec-div -qoverride-limits -diag-disable=10441 -diag-disable=2196") endif() endif() endif() @@ -854,8 +854,11 @@ if(BUILD_SHARED_LIBS OR PKG_PYTHON) find_package(Python COMPONENTS Interpreter) endif() if(Python_EXECUTABLE) - file(MAKE_DIRECTORY ${CMAKE_BINARY_DIR}/python) - install(CODE "execute_process(COMMAND ${Python_EXECUTABLE} setup.py build -b ${CMAKE_BINARY_DIR}/python install --prefix=${CMAKE_INSTALL_PREFIX} --root=\$ENV{DESTDIR}/ WORKING_DIRECTORY ${LAMMPS_PYTHON_DIR})") + file(MAKE_DIRECTORY ${CMAKE_BINARY_DIR}/python/lib) + file(MAKE_DIRECTORY ${CMAKE_BINARY_DIR}/python/src) + file(COPY ${LAMMPS_SOURCE_DIR}/version.h DESTINATION ${CMAKE_BINARY_DIR}/python/src) + file(COPY ${LAMMPS_PYTHON_DIR}/README ${LAMMPS_PYTHON_DIR}/pyproject.toml ${LAMMPS_PYTHON_DIR}/setup.py ${LAMMPS_PYTHON_DIR}/lammps DESTINATION ${CMAKE_BINARY_DIR}/python/lib) + install(CODE "if(\"\$ENV{DESTDIR}\" STREQUAL \"\")\n execute_process(COMMAND ${Python_EXECUTABLE} -m pip install -v ${CMAKE_BINARY_DIR}/python/lib --prefix=${CMAKE_INSTALL_PREFIX})\n else()\n execute_process(COMMAND ${Python_EXECUTABLE} -m pip install -v ${CMAKE_BINARY_DIR}/python/lib --prefix=${CMAKE_INSTALL_PREFIX} --root=\$ENV{DESTDIR})\n endif()") endif() endif() diff --git a/cmake/Modules/Packages/KOKKOS.cmake b/cmake/Modules/Packages/KOKKOS.cmake index 7f23a6f777..2117397494 100644 --- a/cmake/Modules/Packages/KOKKOS.cmake +++ b/cmake/Modules/Packages/KOKKOS.cmake @@ -47,8 +47,8 @@ if(DOWNLOAD_KOKKOS) list(APPEND KOKKOS_LIB_BUILD_ARGS "-DCMAKE_CXX_EXTENSIONS=${CMAKE_CXX_EXTENSIONS}") list(APPEND KOKKOS_LIB_BUILD_ARGS "-DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}") include(ExternalProject) - set(KOKKOS_URL "https://github.com/kokkos/kokkos/archive/3.6.01.tar.gz" CACHE STRING "URL for KOKKOS tarball") - set(KOKKOS_MD5 "0ec97fc0c356dd65bd2487defe81a7bf" CACHE STRING "MD5 checksum of KOKKOS tarball") + set(KOKKOS_URL "https://github.com/kokkos/kokkos/archive/3.7.00.tar.gz" CACHE STRING "URL for KOKKOS tarball") + set(KOKKOS_MD5 "84991eca9f066383abe119a5bc7a11c4" CACHE STRING "MD5 checksum of KOKKOS tarball") mark_as_advanced(KOKKOS_URL) mark_as_advanced(KOKKOS_MD5) ExternalProject_Add(kokkos_build @@ -72,7 +72,7 @@ if(DOWNLOAD_KOKKOS) add_dependencies(LAMMPS::KOKKOSCORE kokkos_build) add_dependencies(LAMMPS::KOKKOSCONTAINERS kokkos_build) elseif(EXTERNAL_KOKKOS) - find_package(Kokkos 3.6.01 REQUIRED CONFIG) + find_package(Kokkos 3.7.00 REQUIRED CONFIG) target_link_libraries(lammps PRIVATE Kokkos::kokkos) target_link_libraries(lmp PRIVATE Kokkos::kokkos) else() diff --git a/cmake/Modules/Packages/ML-PACE.cmake b/cmake/Modules/Packages/ML-PACE.cmake index c553809ff1..c82ba50d01 100644 --- a/cmake/Modules/Packages/ML-PACE.cmake +++ b/cmake/Modules/Packages/ML-PACE.cmake @@ -1,6 +1,6 @@ -set(PACELIB_URL "https://github.com/ICAMS/lammps-user-pace/archive/refs/tags/v.2021.10.25.fix2.tar.gz" CACHE STRING "URL for PACE evaluator library sources") +set(PACELIB_URL "https://github.com/ICAMS/lammps-user-pace/archive/refs/tags/v.2022.09.27.fix10Oct.tar.gz" CACHE STRING "URL for PACE evaluator library sources") -set(PACELIB_MD5 "32394d799bc282bb57696c78c456e64f" CACHE STRING "MD5 checksum of PACE evaluator library tarball") +set(PACELIB_MD5 "766cebcc0e5c4b8430c2f3cd202d9905" CACHE STRING "MD5 checksum of PACE evaluator library tarball") mark_as_advanced(PACELIB_URL) mark_as_advanced(PACELIB_MD5) @@ -15,23 +15,9 @@ execute_process( ) get_newest_file(${CMAKE_BINARY_DIR}/lammps-user-pace-* lib-pace) -# enforce building libyaml-cpp as static library and turn off optional features -set(YAML_BUILD_SHARED_LIBS OFF) -set(YAML_CPP_BUILD_CONTRIB OFF) -set(YAML_CPP_BUILD_TOOLS OFF) -add_subdirectory(${lib-pace}/yaml-cpp build-yaml-cpp) -set(YAML_CPP_INCLUDE_DIR ${lib-pace}/yaml-cpp/include) - -file(GLOB PACE_EVALUATOR_INCLUDE_DIR ${lib-pace}/ML-PACE) -file(GLOB PACE_EVALUATOR_SOURCES ${lib-pace}/ML-PACE/*.cpp) -list(FILTER PACE_EVALUATOR_SOURCES EXCLUDE REGEX pair_pace.cpp) - -add_library(pace STATIC ${PACE_EVALUATOR_SOURCES}) +add_subdirectory(${lib-pace} build-pace) set_target_properties(pace PROPERTIES CXX_EXTENSIONS ON OUTPUT_NAME lammps_pace${LAMMPS_MACHINE}) -target_include_directories(pace PUBLIC ${PACE_EVALUATOR_INCLUDE_DIR} ${YAML_CPP_INCLUDE_DIR}) - -target_link_libraries(pace PRIVATE yaml-cpp-pace) if(CMAKE_PROJECT_NAME STREQUAL "lammps") target_link_libraries(lammps PRIVATE pace) endif() diff --git a/cmake/presets/intel.cmake b/cmake/presets/intel.cmake index 9baf873608..d61ea20d78 100644 --- a/cmake/presets/intel.cmake +++ b/cmake/presets/intel.cmake @@ -1,4 +1,4 @@ -# preset that will enable Intel compilers with support for MPI and OpenMP (on Linux boxes) +# preset that will enable the classic Intel compilers with support for MPI and OpenMP (on Linux boxes) set(CMAKE_CXX_COMPILER "icpc" CACHE STRING "" FORCE) set(CMAKE_C_COMPILER "icc" CACHE STRING "" FORCE) @@ -18,11 +18,11 @@ set(MPI_CXX_COMPILER "mpicxx" CACHE STRING "" FORCE) unset(HAVE_OMP_H_INCLUDE CACHE) set(OpenMP_C "icc" CACHE STRING "" FORCE) -set(OpenMP_C_FLAGS "-qopenmp" CACHE STRING "" FORCE) +set(OpenMP_C_FLAGS "-qopenmp -qopenmp-simd" CACHE STRING "" FORCE) set(OpenMP_C_LIB_NAMES "omp" CACHE STRING "" FORCE) set(OpenMP_CXX "icpc" CACHE STRING "" FORCE) -set(OpenMP_CXX_FLAGS "-qopenmp" CACHE STRING "" FORCE) +set(OpenMP_CXX_FLAGS "-qopenmp -qopenmp-simd" CACHE STRING "" FORCE) set(OpenMP_CXX_LIB_NAMES "omp" CACHE STRING "" FORCE) -set(OpenMP_Fortran_FLAGS "-qopenmp" CACHE STRING "" FORCE) +set(OpenMP_Fortran_FLAGS "-qopenmp -qopenmp-simd" CACHE STRING "" FORCE) set(OpenMP_omp_LIBRARY "libiomp5.so" CACHE PATH "" FORCE) diff --git a/cmake/presets/oneapi.cmake b/cmake/presets/oneapi.cmake index 403494c409..2aacf1a1f5 100644 --- a/cmake/presets/oneapi.cmake +++ b/cmake/presets/oneapi.cmake @@ -18,11 +18,11 @@ set(MPI_CXX_COMPILER "mpicxx" CACHE STRING "" FORCE) unset(HAVE_OMP_H_INCLUDE CACHE) set(OpenMP_C "icx" CACHE STRING "" FORCE) -set(OpenMP_C_FLAGS "-qopenmp" CACHE STRING "" FORCE) +set(OpenMP_C_FLAGS "-qopenmp -qopenmp-simd" CACHE STRING "" FORCE) set(OpenMP_C_LIB_NAMES "omp" CACHE STRING "" FORCE) set(OpenMP_CXX "icpx" CACHE STRING "" FORCE) -set(OpenMP_CXX_FLAGS "-qopenmp" CACHE STRING "" FORCE) +set(OpenMP_CXX_FLAGS "-qopenmp -qopenmp-simd" CACHE STRING "" FORCE) set(OpenMP_CXX_LIB_NAMES "omp" CACHE STRING "" FORCE) -set(OpenMP_Fortran_FLAGS "-qopenmp" CACHE STRING "" FORCE) +set(OpenMP_Fortran_FLAGS "-qopenmp -qopenmp-simd" CACHE STRING "" FORCE) set(OpenMP_omp_LIBRARY "libiomp5.so" CACHE PATH "" FORCE) diff --git a/doc/src/Build_extras.rst b/doc/src/Build_extras.rst index 2535b9010f..c15af87dc6 100644 --- a/doc/src/Build_extras.rst +++ b/doc/src/Build_extras.rst @@ -483,6 +483,9 @@ They must be specified in uppercase. * - **Arch-ID** - **HOST or GPU** - **Description** + * - NATIVE + - HOST + - Local machine * - AMDAVX - HOST - AMD 64-bit x86 CPU (AVX 1) @@ -522,9 +525,21 @@ They must be specified in uppercase. * - BDW - HOST - Intel Broadwell Xeon E-class CPU (AVX 2 + transactional mem) + * - SKL + - HOST + - Intel Skylake Client CPU * - SKX - HOST - - Intel Sky Lake Xeon E-class HPC CPU (AVX512 + transactional mem) + - Intel Skylake Xeon Server CPU (AVX512) + * - ICL + - HOST + - Intel Ice Lake Client CPU (AVX512) + * - ICX + - HOST + - Intel Ice Lake Xeon Server CPU (AVX512) + * - SPR + - HOST + - Intel Sapphire Rapids Xeon Server CPU (AVX512) * - KNC - HOST - Intel Knights Corner Xeon Phi @@ -596,7 +611,10 @@ They must be specified in uppercase. - AMD GPU MI100 GFX908 * - VEGA90A - GPU - - AMD GPU + - AMD GPU MI200 GFX90A + * - INTEL_GEN + - GPU + - SPIR64-based devices, e.g. Intel GPUs, using JIT * - INTEL_DG1 - GPU - Intel Iris XeMAX GPU @@ -611,9 +629,12 @@ They must be specified in uppercase. - Intel GPU Gen12LP * - INTEL_XEHP - GPU - - Intel GPUs Xe-HP + - Intel GPU Xe-HP + * - INTEL_PVC + - GPU + - Intel GPU Ponte Vecchio -This list was last updated for version 3.5.0 of the Kokkos library. +This list was last updated for version 3.7.0 of the Kokkos library. .. tabs:: diff --git a/doc/src/Build_manual.rst b/doc/src/Build_manual.rst index a920688923..c71c536e10 100644 --- a/doc/src/Build_manual.rst +++ b/doc/src/Build_manual.rst @@ -216,7 +216,7 @@ be multiple tests run automatically: - A test that only standard, printable ASCII text characters are used. This runs the command ``env LC_ALL=C grep -n '[^ -~]' src/*.rst`` and thus prints all offending lines with filename and line number - prepended to the screen. Special characters like greek letters + prepended to the screen. Special characters like Greek letters (:math:`\alpha~~\sigma~~\epsilon`), super- or subscripts (:math:`x^2~~\mathrm{U}_{LJ}`), mathematical expressions (:math:`\frac{1}{2}\mathrm{N}~~x\to\infty`), or the Angstrom symbol diff --git a/doc/src/Commands_pair.rst b/doc/src/Commands_pair.rst index dda23d4a41..e800df613a 100644 --- a/doc/src/Commands_pair.rst +++ b/doc/src/Commands_pair.rst @@ -236,6 +236,7 @@ OPT. * :doc:`oxrna2/xstk ` * :doc:`oxrna2/coaxstk ` * :doc:`pace (k) ` + * :doc:`pace/extrapolation ` * :doc:`peri/eps ` * :doc:`peri/lps (o) ` * :doc:`peri/pmb (o) ` diff --git a/doc/src/Fortran.rst b/doc/src/Fortran.rst index 77ab447c7c..28254e056c 100644 --- a/doc/src/Fortran.rst +++ b/doc/src/Fortran.rst @@ -1,16 +1,16 @@ The ``LIBLAMMPS`` Fortran Module ******************************** -The ``LIBLAMMPS`` module provides an interface to call LAMMPS from a -Fortran code. It is based on the LAMMPS C-library interface and -requires a Fortran 2003 compatible compiler to be compiled. It is +The ``LIBLAMMPS`` module provides an interface to call LAMMPS from Fortran. +It is based on the LAMMPS C library interface and +requires a Fortran 2003-compatible compiler to be compiled. It is designed to be self-contained and not require any support functions -written in C, C++, or Fortran. +written in C, C++, or Fortran other than those in the C library interface. While C libraries have a defined binary interface (ABI) and can thus be -used from multiple compiler versions from different vendors for as long +used from multiple compiler versions from different vendors as long as they are compatible with the hosting operating system, the same is -not true for Fortran codes. Thus the LAMMPS Fortran module needs to be +not true for Fortran programs. Thus, the LAMMPS Fortran module needs to be compiled alongside the code using it from the source code in ``fortran/lammps.f90``. When linking, you also need to :doc:`link to the LAMMPS library `. A typical command line @@ -18,23 +18,23 @@ for a simple program using the Fortran interface would be: .. code-block:: bash - mpifort -o testlib.x lammps.f90 testlib.f90 -L. -llammps + mpifort -o testlib.x lammps.f90 testlib.f90 -L. -llammps -Please note, that the MPI compiler wrapper is only required when the -calling the library from an MPI parallel code. Otherwise, using the +Please note that the MPI compiler wrapper is only required when the +calling the library from an MPI-parallelized program. Otherwise, using the fortran compiler (gfortran, ifort, flang, etc.) will suffice. It may be -necessary to link to additional libraries depending on how LAMMPS was +necessary to link to additional libraries, depending on how LAMMPS was configured and whether the LAMMPS library :doc:`was compiled as a static -or shared library `. +or dynamic library `. If the LAMMPS library itself has been compiled with MPI support, the resulting executable will still be able to run LAMMPS in parallel with -``mpirun`` or equivalent. Please also note that the order of the source -files matters: the ``lammps.f90`` file needs to be compiled first, since -it provides the ``LIBLAMMPS`` module that is imported by the Fortran -code using the interface. A working example code can be found together -with equivalent examples in C and C++ in the ``examples/COUPLE/simple`` -folder of the LAMMPS distribution. +``mpirun``, ``mpiexec`` or equivalent. Please also note that the order +of the source files matters: the ``lammps.f90`` file needs to be +compiled first, since it provides the ``LIBLAMMPS`` module that is +imported by the Fortran code that uses the interface. A working example +can be found together with equivalent examples in C and C++ in the +``examples/COUPLE/simple`` folder of the LAMMPS distribution. .. versionadded:: 9Oct2020 @@ -49,7 +49,7 @@ folder of the LAMMPS distribution. .. note:: A contributed (and more complete!) Fortran interface that more - closely resembles the C-library interface is available in the + closely resembles the C library interface is available in the ``examples/COUPLE/fortran2`` folder. Please see the ``README`` file in that folder for more information about it and how to contact its author and maintainer. @@ -62,32 +62,31 @@ Creating or deleting a LAMMPS object With the Fortran interface, the creation of a :cpp:class:`LAMMPS ` instance is included in the constructor for creating the :f:func:`lammps` derived type. To import the definition of -that type and its type bound procedures, you need to add a ``USE -LIBLAMMPS`` statement. Internally it will call either +that type and its type-bound procedures, you need to add a ``USE LIBLAMMPS`` +statement. Internally, it will call either :cpp:func:`lammps_open_fortran` or :cpp:func:`lammps_open_no_mpi` from the C library API to create the class instance. All arguments are -optional and :cpp:func:`lammps_mpi_init` will be called automatically, +optional and :cpp:func:`lammps_mpi_init` will be called automatically if it is needed. Similarly, a possible call to :cpp:func:`lammps_mpi_finalize` is integrated into the :f:func:`close` function and triggered with the optional logical argument set to -``.true.``. Here is a simple example: +``.TRUE.``. Here is a simple example: .. code-block:: fortran PROGRAM testlib USE LIBLAMMPS ! include the LAMMPS library interface IMPLICIT NONE - TYPE(lammps) :: lmp ! derived type to hold LAMMPS instance - CHARACTER(len=*), PARAMETER :: args(3) = & - [ CHARACTER(len=12) :: 'liblammps', '-log', 'none' ] + TYPE(lammps) :: lmp ! derived type to hold LAMMPS instance + CHARACTER(LEN=*), PARAMETER :: args(3) = & + [ CHARACTER(LEN=12) :: 'liblammps', '-log', 'none' ] ! create a LAMMPS instance (and initialize MPI) lmp = lammps(args) ! get and print numerical version code PRINT*, 'LAMMPS Version: ', lmp%version() - ! delete LAMMPS instance (and shuts down MPI) - CALL lmp%close(.true.) - + ! delete LAMMPS instance (and shutdown MPI) + CALL lmp%close(.TRUE.) END PROGRAM testlib It is also possible to pass command line flags from Fortran to C/C++ and @@ -103,8 +102,8 @@ version of the previous example: PROGRAM testlib2 USE LIBLAMMPS ! include the LAMMPS library interface IMPLICIT NONE - TYPE(lammps) :: lmp ! derived type to hold LAMMPS instance - CHARACTER(len=128), ALLOCATABLE :: command_args(:) + TYPE(lammps) :: lmp ! derived type to hold LAMMPS instance + CHARACTER(LEN=128), ALLOCATABLE :: command_args(:) INTEGER :: i, argc ! copy command line flags to `command_args()` @@ -122,7 +121,6 @@ version of the previous example: ! delete LAMMPS instance (and shuts down MPI) CALL lmp%close(.TRUE.) DEALLOCATE(command_args) - END PROGRAM testlib2 -------------------- @@ -133,9 +131,9 @@ Executing LAMMPS commands Once a LAMMPS instance is created, it is possible to "drive" the LAMMPS simulation by telling LAMMPS to read commands from a file or to pass individual or multiple commands from strings or lists of strings. This -is done similarly to how it is implemented in the :doc:`C-library +is done similarly to how it is implemented in the :doc:`C library interface `. Before handing off the calls to the -C-library interface, the corresponding Fortran versions of the calls +C library interface, the corresponding Fortran versions of the calls (:f:func:`file`, :f:func:`command`, :f:func:`commands_list`, and :f:func:`commands_string`) have to make a copy of the strings passed as arguments so that they can be modified to be compatible with the @@ -159,9 +157,9 @@ Below is a small demonstration of the uses of the different functions: ! define 10 groups of 10 atoms each ALLOCATE(cmdlist(10)) DO i=1, 10 - WRITE(trimmed,'(I10)') 10*i - WRITE(cmdlist(i),'(A,I1,A,I10,A,A)') & - 'group g', i-1, ' id ', 10*(i-1)+1, ':', ADJUSTL(trimmed) + WRITE(trimmed,'(I10)') 10*i + WRITE(cmdlist(i),'(A,I1,A,I10,A,A)') & + 'group g', i-1, ' id ', 10*(i-1)+1, ':', ADJUSTL(trimmed) END DO CALL lmp%commands_list(cmdlist) ! run multiple commands from multi-line string @@ -171,7 +169,6 @@ Below is a small demonstration of the uses of the different functions: 'create_atoms 1 single 1.0 1.0 ${zpos}' CALL lmp%commands_string(cmds) CALL lmp%close(.TRUE.) - END PROGRAM testcmd --------------- @@ -179,13 +176,15 @@ Below is a small demonstration of the uses of the different functions: Accessing system properties *************************** -The C-library interface allows the :doc:`extraction of different kinds +The C library interface allows the :doc:`extraction of different kinds of information ` about the active simulation -instance and also - in some cases - to apply modifications to it. In -some cases, the C-library interface makes pointers to internal data -structures accessible, thus when accessing them from Fortran, special -care is needed to avoid data corruption and crashes. Thus please see -the documentation of the individual type bound procedures for details. +instance and also---in some cases---to apply modifications to it, and the +Fortran interface provides access to the same data using Fortran-style, +C-interoperable data types. In some cases, the Fortran library interface makes +pointers to internal LAMMPS data structures accessible; when accessing them +through the library interfaces, special care is needed to avoid data corruption +and crashes. Please see the documentation of the individual type-bound +procedures for details. Below is an example demonstrating some of the possible uses. @@ -194,35 +193,36 @@ Below is an example demonstrating some of the possible uses. PROGRAM testprop USE LIBLAMMPS USE, INTRINSIC :: ISO_C_BINDING, ONLY : c_double, c_int64_t - TYPE(lammps) :: lmp - INTEGER(kind=8) :: natoms - REAL(c_double), POINTER :: dt - INTEGER(c_int64_t), POINTER :: ntimestep - REAL(kind=8) :: pe, ke + USE, INTRINSIC :: ISO_FORTRAN_ENV, ONLY : OUTPUT_UNIT + TYPE(lammps) :: lmp + INTEGER(KIND=c_int64_t), POINTER :: natoms + REAL(KIND=c_double), POINTER :: dt + INTEGER(KIND=c_int64_t), POINTER :: ntimestep + REAL(KIND=c_double) :: pe, ke lmp = lammps() CALL lmp%file('in.sysinit') - natoms = INT(lmp%get_natoms(),8) - WRITE(6,'(A,I8,A)') 'Running a simulation with', natoms, ' atoms' - WRITE(6,'(I8,A,I8,A,I3,A)') lmp%extract_setting('nlocal'), ' local and', & - lmp%extract_setting('nghost'), ' ghost atom. ', & + natoms = lmp%extract_global('natoms') + WRITE(OUTPUT_UNIT,'(A,I0,A)') 'Running a simulation with ', natoms, ' atoms' + WRITE(OUTPUT_UNIT,'(I0,A,I0,A,I0,A)') lmp%extract_setting('nlocal'), & + ' local and ', lmp%extract_setting('nghost'), ' ghost atoms. ', & lmp%extract_setting('ntypes'), ' atom types' CALL lmp%command('run 2 post no') dt = lmp%extract_global('dt') ntimestep = lmp%extract_global('ntimestep') - WRITE(6,'(A,I4,A,F4.1,A)') 'At step:', ntimestep, ' Changing timestep from', dt, ' to 0.5' - dt = 0.5 + WRITE(OUTPUT_UNIT,'(A,I0,A,F4.1,A)') 'At step: ', ntimestep, & + ' Changing timestep from', dt, ' to 0.5' + dt = 0.5_c_double CALL lmp%command('run 2 post no') - WRITE(6,'(A,I4)') 'At step:', ntimestep + WRITE(OUTPUT_UNIT,'(A,I0)') 'At step: ', ntimestep pe = lmp%get_thermo('pe') ke = lmp%get_thermo('ke') PRINT*, 'PE = ', pe PRINT*, 'KE = ', ke CALL lmp%close(.TRUE.) - END PROGRAM testprop --------------- @@ -240,9 +240,10 @@ of the contents of the ``LIBLAMMPS`` Fortran interface to LAMMPS. class instance that any of the included calls are forwarded to. :f c_ptr handle: reference to the LAMMPS class + :f type(lammps_style) style: derived type to access lammps style constants + :f type(lammps_type) type: derived type to access lammps type constants :f subroutine close: :f:func:`close` :f subroutine error: :f:func:`error` - :f function version: :f:func:`version` :f subroutine file: :f:func:`file` :f subroutine command: :f:func:`command` :f subroutine commands_list: :f:func:`commands_list` @@ -252,8 +253,23 @@ of the contents of the ``LIBLAMMPS`` Fortran interface to LAMMPS. :f subroutine extract_box: :f:func:`extract_box` :f subroutine reset_box: :f:func:`reset_box` :f subroutine memory_usage: :f:func:`memory_usage` + :f function get_mpi_comm: :f:func:`get_mpi_comm` :f function extract_setting: :f:func:`extract_setting` :f function extract_global: :f:func:`extract_global` + :f function extract_atom: :f:func:`extract_atom` + :f function extract_compute: :f:func:`extract_compute` + :f function extract_fix: :f:func:`extract_fix` + :f function extract_variable: :f:func:`extract_variable` + :f subroutine gather_atoms: :f:func:`gather_atoms` + :f subroutine gather_atoms_concat: :f:func:`gather_atoms_concat` + :f subroutine gather_atoms_subset: :f:func:`gather_atoms_subset` + :f subroutine scatter_atoms: :f:func:`scatter_atoms` + :f subroutine scatter_atoms_subset: :f:func:`scatter_atoms_subset` + :f function version: :f:func:`version` + :f subroutine flush_buffers: :f:func:`flush_buffers` + :f function is_running: :f:func:`is_running` + :f function has_error: :f:func:`has_error` + :f subroutine get_last_error_message: :f:func:`get_last_error_message` -------- @@ -286,12 +302,30 @@ of the contents of the ``LIBLAMMPS`` Fortran interface to LAMMPS. .. code-block:: Fortran PROGRAM testmpi - USE LIBLAMMPS - USE MPI_F08 - TYPE(lammps) :: lmp - lmp = lammps(MPI_COMM_SELF%MPI_VAL) + USE LIBLAMMPS + USE MPI_F08 + TYPE(lammps) :: lmp + lmp = lammps(MPI_COMM_SELF%MPI_VAL) END PROGRAM testmpi +.. f:type:: lammps_style + + This derived type is there to provide a convenient interface for the style + constants used with :f:func:`extract_compute`, :f:func:`extract_fix`, and + :f:func:`extract_variable`. Assuming your LAMMPS instance is called ``lmp``, + these constants will be ``lmp%style%global``, ``lmp%style%atom``, + and ``lmp%style%local``. These values are identical to the values described + in :cpp:enum:`_LMP_STYLE_CONST` for the C library interface. + +.. f:type:: lammps_type + + This derived type is there to provide a convenient interface for the type + constants used with :f:func:`extract_compute`, :f:func:`extract_fix`, and + :f:func:`extract_variable`. Assuming your LAMMPS instance is called ``lmp``, + these constants will be ``lmp%type%scalar``, ``lmp%type%vector``, and + ``lmp%type%array``. These values are identical to the values described + in :cpp:enum:`_LMP_TYPE_CONST` for the C library interface. + Procedures Bound to the lammps Derived Type =========================================== @@ -299,17 +333,18 @@ Procedures Bound to the lammps Derived Type This method will close down the LAMMPS instance through calling :cpp:func:`lammps_close`. If the *finalize* argument is present and - has a value of ``.true.``, then this subroutine also calls + has a value of ``.TRUE.``, then this subroutine also calls :cpp:func:`lammps_mpi_finalize`. - :o logical finalize [optional]: shut down the MPI environment of the LAMMPS library if true. + :o logical finalize [optional]: shut down the MPI environment of the LAMMPS + library if ``.TRUE.``. -------- .. f:subroutine:: error(error_type, error_text) - This method is a wrapper around the :cpp:func:`lammps_error` function and will dispatch - an error through the LAMMPS Error class. + This method is a wrapper around the :cpp:func:`lammps_error` function and + will dispatch an error through the LAMMPS Error class. .. versionadded:: TBD @@ -318,14 +353,6 @@ Procedures Bound to the lammps Derived Type -------- -.. f:function:: version() - - This method returns the numeric LAMMPS version like :cpp:func:`lammps_version` - - :r integer: LAMMPS version - --------- - .. f:subroutine:: file(filename) This method will call :cpp:func:`lammps_file` to have LAMMPS read @@ -369,6 +396,12 @@ Procedures Bound to the lammps Derived Type :r real(c_double): number of atoms + .. note:: + + If you would prefer to get the number of atoms in its native format + (i.e., as a 32- or 64-bit integer, depending on how LAMMPS was compiled), + this can be extracted with :f:func:`extract_global`. + -------- .. f:function:: get_thermo(name) @@ -471,7 +504,7 @@ Procedures Bound to the lammps Derived Type .. note:: - The `MPI_F08` module, which defines Fortran 2008 bindings for MPI, + The ``MPI_F08`` module, which defines Fortran 2008 bindings for MPI, is not directly supported by this function. However, you should be able to convert between the two using the `MPI_VAL` member of the communicator. For example, @@ -480,12 +513,12 @@ Procedures Bound to the lammps Derived Type USE MPI_F08 USE LIBLAMMPS - TYPE (LAMMPS) :: lmp + TYPE (lammps) :: lmp TYPE (MPI_Comm) :: comm ! ... [commands to set up LAMMPS/etc.] comm%MPI_VAL = lmp%get_mpi_comm() - should assign an `MPI_F08` communicator properly. + should assign an ``MPI_F08`` communicator properly. -------- @@ -528,21 +561,21 @@ Procedures Bound to the lammps Derived Type .. code-block:: fortran PROGRAM demo - USE, INTRINSIC :: ISO_C_BINDING, ONLY : c_int64_t - USE LIBLAMMPS - TYPE(lammps) :: lmp - INTEGER(c_int), POINTER :: nlocal - INTEGER(c_int64_t), POINTER :: ntimestep - CHARACTER(LEN=10) :: units - REAL(c_double), POINTER :: dt - lmp = lammps() - ! other commands - nlocal = lmp%extract_global('nlocal') - ntimestep = lmp%extract_global('ntimestep') - dt = lmp%extract_global('dt') - units = lmp%extract_global('units') - ! more commands - lmp.close(.TRUE.) + USE, INTRINSIC :: ISO_C_BINDING, ONLY : c_int64_t, c_int, c_double + USE LIBLAMMPS + TYPE(lammps) :: lmp + INTEGER(c_int), POINTER :: nlocal => NULL() + INTEGER(c_int64_t), POINTER :: ntimestep => NULL() + REAL(c_double), POINTER :: dt => NULL() + CHARACTER(LEN=10) :: units + lmp = lammps() + ! other commands + nlocal = lmp%extract_global('nlocal') + ntimestep = lmp%extract_global('ntimestep') + dt = lmp%extract_global('dt') + units = lmp%extract_global('units') + ! more commands + lmp.close(.TRUE.) END PROGRAM demo would extract the number of atoms on this processor, the current time step, @@ -551,23 +584,957 @@ Procedures Bound to the lammps Derived Type .. note:: - if this function returns a string, the string must have - length greater than or equal to the length of the string (not including the - terminal NULL character) that LAMMPS returns. If the variable's length is - too short, the string will be truncated. As usual in Fortran, strings - are padded with spaces at the end. + If :f:func:`extract_global` returns a string, the string must have length + greater than or equal to the length of the string (not including the + terminal ``NULL`` character) that LAMMPS returns. If the variable's + length is too short, the string will be truncated. As usual in Fortran, + strings are padded with spaces at the end. If you use an allocatable + string, the string **must be allocated** prior to calling this function, + but you can automatically reallocate it to the correct length after the + function returns, viz., - :p character(len=\*) name: string with the name of the extracted property + .. code-block :: Fortran + + PROGRAM test + USE LIBLAMMPS + TYPE(lammps) :: lmp + CHARACTER(LEN=:), ALLOCATABLE :: str + lmp = lammps() + CALL lmp%command('units metal') + ALLOCATE ( CHARACTER(LEN=80) :: str ) + str = lmp%extract_global('units') + str = TRIM(str) ! re-allocates to length len_trim(str) here + PRINT*, LEN(str), LEN_TRIM(str) + END PROGRAM test + + will print the number 5 (the length of the word "metal") twice. + + :p character(len=\*) name: string with the name of the property to extract :r polymorphic: pointer to LAMMPS data. The left-hand side of the assignment should be either a string (if expecting string data) or a C-compatible pointer (e.g., ``INTEGER (c_int), POINTER :: nlocal``) to the extracted property. If expecting vector data, the pointer should have dimension ":". -.. warning:: + .. warning:: - Modifying the data in the location pointed to by the returned pointer - may lead to inconsistent internal data and thus may cause failures or - crashes or bogus simulations. In general it is thus usually better - to use a LAMMPS input command that sets or changes these parameters. - Those will take care of all side effects and necessary updates of - settings derived from such settings. + Modifying the data in the location pointed to by the returned pointer + may lead to inconsistent internal data and thus may cause failures, + crashes, or bogus simulations. In general, it is much better + to use a LAMMPS input command that sets or changes these parameters. + Using an input command will take care of all side effects and necessary + updates of settings derived from such settings. + +-------- + +.. f:function:: extract_atom(name) + + This function calls :c:func:`lammps_extract_atom` and returns a pointer to + LAMMPS data tied to the :cpp:class:`Atom` class, depending on the data + requested through *name*. + + .. versionadded:: TBD + + Note that this function actually does not return a pointer, but rather + associates the pointer on the left side of the assignment to point + to internal LAMMPS data. Pointers must be of the correct type, kind, and + rank (e.g., ``INTEGER(c_int), DIMENSION(:)`` for "type", "mask", or "tag"; + ``INTEGER(c_int64_t), DIMENSION(:)`` for "tag" if LAMMPS was compiled + with the ``-DLAMMPS_BIGBIG`` flag; ``REAL(c_double), DIMENSION(:,:)`` for + "x", "v", or "f"; and so forth). The pointer being associated with LAMMPS + data is type-, kind-, and rank-checked at run-time. Pointers returned by + this function are generally persistent; therefore, it is not necessary to + call the function again unless the underlying LAMMPS data are destroyed, + such as through the :doc:`clear` command. + + :p character(len=\*) name: string with the name of the property to extract + :r polymorphic: pointer to LAMMPS data. The left-hand side of the assignment + should be a C-interoperable pointer of appropriate kind and rank + (e.g., ``INTEGER (c_int), POINTER :: mask(:)``) to the extracted + property. If expecting vector data, the pointer should have dimension ":"; + if expecting matrix data, the pointer should have dimension ":,:". + + .. admonition:: Array index order + + Two-dimensional arrays returned from :f:func:`extract_atom` will be + **transposed** from equivalent arrays in C, and they will be indexed + from 1 instead of 0. For example, in C, + + .. code-block:: C + + void *lmp; + double **x; + /* more code to setup, etc. */ + x = lammps_extract_atom(lmp, "x"); + printf("%f\n", x[5][1]); + + will print the *y*-coordinate of the sixth atom on this processor. + Conversely, + + .. code-block:: Fortran + + TYPE(lammps) :: lmp + REAL(c_double), DIMENSION(:,:), POINTER :: x => NULL() + ! more code to setup, etc. + x = lmp%extract_atom("x") + print '(f0.6)', x(2,6) + + will print the *y*-coordinate of the sixth atom on this processor + (note the transposition of the two indices). This is not a choice, but + rather a consequence of the different conventions adopted by the Fortran + and C standards decades ago: in C, the block of data + + .. parsed-literal:: + + 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 + + interpreted as a :math:`4\times4` matrix would be + + .. math:: + + \begin{bmatrix} + 1 & 2 & 3 & 4 \\ + 5 & 6 & 7 & 8 \\ + 9 & 10 & 11 & 12 \\ + 13 & 14 & 15 & 16 + \end{bmatrix}, + + that is, in row-major order. In Fortran, the same block of data is + interpreted in column-major order, namely, + + .. math:: + + \begin{bmatrix} + 1 & 5 & 9 & 13 \\ + 2 & 6 & 10 & 14 \\ + 3 & 7 & 11 & 15 \\ + 4 & 8 & 12 & 16 + \end{bmatrix}. + + This difference in interpretation of the same block of data by the two + languages means, in effect, that matrices from C or C++ will be + transposed when interpreted in Fortran. + + .. note:: + + If you would like the indices to start at 0 instead of 1 (which follows + typical notation in C and C++, but not Fortran), you can create another + pointer and associate it thus: + + .. code-block:: Fortran + + REAL(c_double), DIMENSION(:,:), POINTER :: x, x0 + x = lmp%extract_atom("x") + x0(0:,0:) => x + + The above would cause the dimensions of *x* to be (1:3, 1:nmax) + and those of *x0* to be (0:2, 0:nmax-1). + +-------- + +.. f:function:: extract_compute(id, style, type) + + This function calls :c:func:`lammps_extract_compute` and returns a pointer + to LAMMPS data tied to the :cpp:class:`Compute` class, specifically data + provided by the compute identified by *id*. Computes may provide global, + per-atom, or local data, and those data may be a scalar, a vector, or an + array. Since computes may provide multiple kinds of data, the user is + required to specify which set of data is to be returned through the + *style* and *type* variables. + + .. versionadded:: TBD + + Note that this function actually does not return a value, but rather + associates the pointer on the left side of the assignment to point to + internal LAMMPS data. Pointers must be of the correct data type to point to + said data (i.e., ``REAL(c_double)``) and have compatible rank. The pointer + being associated with LAMMPS data is type-, kind-, and rank-checked at + run-time via an overloaded assignment operator. + + For example, + + .. code-block:: Fortran + + TYPE(lammps) :: lmp + REAL(c_double), DIMENSION(:), POINTER :: COM + ! code to setup, create atoms, etc. + CALL lmp%compute('compute COM all com') + COM = lmp%extract_compute('COM', lmp%style%global, lmp%style%type) + + will bind the variable *COM* to the center of mass of the atoms created in + your simulation. The vector in this case has length 3; the length (or, in + the case of array data, the number of rows and columns) is determined for + you based on data from the :cpp:class:`Compute` class. + + .. admonition:: Array index order + + Two-dimensional arrays returned from :f:func:`extract_compute` will be + **transposed** from equivalent arrays in C, and they will be indexed + from 1 instead of 0. See the note at :f:func:`extract_atom` for + further details. + + The following combinations are possible (assuming ``lmp`` is the name of + your LAMMPS instance): + + .. list-table:: + :header-rows: 1 + :widths: auto + + * - Style + - Type + - Type to assign to + - Returned data + * - ``lmp%style%global`` + - ``lmp%type%scalar`` + - ``REAL(c_double), POINTER`` + - Global scalar + * - ``lmp%style%global`` + - ``lmp%type%vector`` + - ``REAL(c_double), DIMENSION(:), POINTER`` + - Global vector + * - ``lmp%style%global`` + - ``lmp%type%array`` + - ``REAL(c_double), DIMENSION(:,:), POINTER`` + - Global array + * - ``lmp%style%atom`` + - ``lmp%type%vector`` + - ``REAL(c_double), DIMENSION(:), POINTER`` + - Per-atom vector + * - ``lmp%style%atom`` + - ``lmp%type%array`` + - ``REAL(c_double), DIMENSION(:,:), POINTER`` + - Per-atom array + * - ``lmp%style%local`` + - ``lmp%type%vector`` + - ``REAL(c_double), DIMENSION(:), POINTER`` + - Local vector + * - ``lmp%style%local`` + - ``lmp%type%array`` + - ``REAL(c_double), DIMENSION(:,:), POINTER`` + - Local array + + :p character(len=\*) id: compute ID from which to extract data + :p integer(c_int) style: value indicating the style of data to extract + (global, per-atom, or local) + :p integer(c_int) type: value indicating the type of data to extract + (scalar, vector, or array) + :r polymorphic: pointer to LAMMPS data. The left-hand side of the assignment + should be a C-compatible pointer (e.g., ``REAL (c_double), POINTER :: x``) + to the extracted property. If expecting vector data, the pointer should + have dimension ":"; if expecting array (matrix) data, the pointer should + have dimension ":,:". + + .. note:: + + If the compute's data are not already computed for the current step, the + compute will be invoked. LAMMPS cannot easily check at that time if it is + valid to invoke a compute, so it may fail with an error. The caller has + to check to avoid such an error. + + .. warning:: + + The pointers returned by this function are generally not persistent, + since the computed data may be re-distributed, re-allocated, and + re-ordered at every invocation. It is advisable to re-invoke this + function before the data are accessed or make a copy if the data are to + be used after other LAMMPS commands have been issued. Do **not** modify + the data returned by this function. + +-------- + +.. f:function:: extract_fix(id, style, type[, nrow][, ncol]) + + This function calls :c:func:`lammps_extract_fix` and returns a pointer to + LAMMPS data tied to the :cpp:class:`Fix` class, specifically data provided + by the fix identified by *id*. Fixes may provide global, per-atom, or + local data, and those data may be a scalar, a vector, or an array. Since + many fixes provide multiple kinds of data, the user is required to specify + which set of data is to be returned through the *style* and *type* + variables. + + .. versionadded:: TBD + + Global data are calculated at the time they are requested and are only + available element-by-element. As such, the user is expected to provide + the *nrow* variable to specify which element of a global vector or the + *nrow* and *ncol* variables to specify which element of a global array the + user wishes LAMMPS to return. The *ncol* variable is optional for global + scalar or vector data, and both *nrow* and *ncol* are optional when a + global scalar is requested, as well as when per-atom or local data are + requested. The following combinations are possible (assuming ``lmp`` is the + name of your LAMMPS instance): + + .. list-table:: + :header-rows: 1 + :widths: auto + + * - Style + - Type + - nrow + - ncol + - Type to assign to + - Returned data + * - ``lmp%style%global`` + - ``lmp%type%scalar`` + - Ignored + - Ignored + - ``REAL(c_double)`` + - Global scalar + * - ``lmp%style%global`` + - ``lmp%type%vector`` + - Required + - Ignored + - ``REAL(c_double)`` + - Element of global vector + * - ``lmp%style%global`` + - ``lmp%type%array`` + - Required + - Required + - ``REAL(c_double)`` + - Element of global array + * - ``lmp%style%atom`` + - ``lmp%type%scalar`` + - + - + - + - (not allowed) + * - ``lmp%style%atom`` + - ``lmp%type%vector`` + - Ignored + - Ignored + - ``REAL(c_double), DIMENSION(:), POINTER`` + - Per-atom vector + * - ``lmp%style%atom`` + - ``lmp%type%array`` + - Ignored + - Ignored + - ``REAL(c_double), DIMENSION(:,:), POINTER`` + - Per-atom array + * - ``lmp%style%local`` + - ``lmp%type%scalar`` + - + - + - + - (not allowed) + * - ``lmp%style%local`` + - ``lmp%type%vector`` + - Ignored + - Ignored + - ``REAL(c_double), DIMENSION(:), POINTER`` + - Per-atom vector + * - ``lmp%style%local`` + - ``lmp%type%array`` + - Ignored + - Ignored + - ``REAL(c_double), DIMENSION(:,:), POINTER`` + - Per-atom array + + In the case of global data, this function returns a value of type + ``real(c_double)``. For per-atom or local data, this function does not + return a value but instead associates the pointer on the left side of the + assignment to point to internal LAMMPS data. Pointers must be of the correct + data type to point to said data (i.e., ``REAL(c_double)``) and have + compatible rank. The pointer being associated with LAMMPS data is type-, + kind-, and rank-checked at run-time via an overloaded assignment operator. + + For example, + + .. code-block:: Fortran + + TYPE(lammps) :: lmp + REAL(c_double) :: dr, dx, dy, dz + ! more code to set up, etc. + lmp%command('fix george all recenter 2 2 2') + ! more code + dr = lmp%extract_fix("george", lmp%style%global, lmp%style%scalar) + dx = lmp%extract_fix("george", lmp%style%global, lmp%style%vector, 1) + dy = lmp%extract_fix("george", lmp%style%global, lmp%style%vector, 2) + dz = lmp%extract_fix("george", lmp%style%global, lmp%style%vector, 3) + + will extract the global scalar calculated by + :doc:`fix recenter ` into the variable *dr* and the + three elements of the global vector calculated by fix recenter into the + variables *dx*, *dy*, and *dz*, respectively. + + If asked for per-atom or local data, :f:func:`extract_compute` returns a + pointer to actual LAMMPS data. The pointer so returned will have the + appropriate size to match the internal data, and will be + type/kind/rank-checked at the time of the assignment. For example, + + .. code-block:: Fortran + + TYPE(lammps) :: lmp + REAL(c_double), DIMENSION(:), POINTER :: r + ! more code to set up, etc. + lmp%command('fix state all store/state 0 x y z') + ! more code + r = lmp%extract_fix('state', lmp%style%atom, lmp%type%array) + + will bind the pointer *r* to internal LAMMPS data representing the per-atom + array computed by :doc:`fix store/state ` when three + inputs are specified. Similarly, + + .. code-block:: Fortran + + TYPE(lammps) :: lmp + REAL(c_double), DIMENSION(:), POINTER :: x + ! more code to set up, etc. + lmp%command('fix state all store/state 0 x') + ! more code + x = lmp%extract_fix('state', lmp%style%atom, lmp%type%vector) + + will associate the pointer *x* with internal LAMMPS data corresponding to + the per-atom vector computed by :doc:`fix store/state ` + when only one input is specified. Similar examples with ``lmp%style%atom`` + replaced by ``lmp%style%local`` will extract local data from fixes that + define local vectors and/or arrays. + + .. warning:: + + The pointers returned by this function for per-atom or local data are + generally not persistent, since the computed data may be redistributed, + reallocated, and reordered at every invocation of the fix. It is thus + advisable to re-invoke this function before the data are accessed or to + make a copy if the data are to be used after other LAMMPS commands have + been issued. + + .. note:: + + LAMMPS cannot easily check if it is valid to access the data, so it + may fail with an error. The caller has to avoid such an error. + + :p character(len=\*) id: string with the name of the fix from which + to extract data + :p integer(c_int) style: value indicating the style of data to extract + (global, per-atom, or local) + :p integer(c_int) type: value indicating the type of data to extract + (scalar, vector, or array) + :p integer(c_int) nrow: row index (used only for global vectors and arrays) + :p integer(c_int) ncol: column index (only used for global arrays) + :r polymorphic: LAMMPS data (for global data) or a pointer to LAMMPS data + (for per-atom or local data). The left-hand side of the assignment should + be of type ``REAL(c_double)`` and have appropriate rank (i.e., + ``DIMENSION(:)`` if expecting per-atom or local vector data and + ``DIMENSION(:,:)`` if expecting per-atom or local array data). If expecting + local or per-atom data, it should have the ``POINTER`` attribute, but + if expecting global data, it should be an ordinary (non-``POINTER``) + variable. + + .. admonition:: Array index order + + Two-dimensional global, per-atom, or local array data from + :f:func:`extract_fix` will be **transposed** from equivalent arrays in + C (or in the ordinary LAMMPS interface accessed through thermodynamic + output), and they will be indexed from 1, not 0. This is true even for + global data, which are returned as scalars---this is done primarily so + the interface is consistent, as there is no choice but to transpose the + indices for per-atom or local array data. See the similar note under + :f:func:`extract_atom` for further details. + +-------- + +.. f:function:: extract_variable(name[,group]) + + This function calls :c:func:`lammps_extract_variable` and returns a scalar, + vector, or string containing the value of the variable identified by + *name*. When the variable is an *equal*-style variable (or one compatible + with that style such as *internal*), the variable is evaluated and the + corresponding value returned. When the variable is an *atom*-style variable, + the variable is evaluated and a vector of values is returned. With all + other variables, a string is returned. The *group* argument is only used + for *atom* style variables and is ignored otherwise. If *group* is absent + for *atom*-style variables, the group is assumed to be "all". + + .. versionadded:: TBD + + This function returns the values of the variables, not pointers to them. + Vectors pointing to *atom*-style variables should be of type + ``REAL(c_double)``, be of rank 1 (i.e., ``DIMENSION(:)``), and have the + ``ALLOCATABLE`` attribute. + + .. note:: + + Unlike the C library interface, the Fortran interface does not require + you to deallocate memory when you are through; this is done for you, + behind the scenes. + + For example, + + .. code-block:: Fortran + + TYPE(lammps) :: lmp + REAL(c_double) :: area + ! more code to set up, etc. + lmp%command('variable A equal lx*ly') + ! more code + area = lmp%extract_variable("A") + + will extract the *x*\ --*y* cross-sectional area of the simulation into the + variable *area*. + + :p character(len=\*) name: variable name to evaluate + :o character(len=\*) group [optional]: group for which to extract per-atom + data (if absent, use "all") + :r polymorphic: scalar of type ``REAL(c_double)`` (for *equal*-style + variables and others that are *equal*-compatible), vector of type + ``REAL(c_double), DIMENSION(:), ALLOCATABLE`` for *atom*- or *vector*-style + variables, or ``CHARACTER(LEN=*)`` for *string*-style and compatible + variables. Strings whose length is too short to hold the result will be + truncated. Allocatable strings must be allocated before this function is + called; see note at :f:func:`extract_global` regarding allocatable strings. + Allocatable arrays (for *atom*- and *vector*-style data) will be + reallocated on assignment. + +.. note:: + + LAMMPS cannot easily check if it is valid to access the data + referenced by the variables (e.g., computes, fixes, or thermodynamic + info), so it may fail with an error. The caller has to make certain + that the data are extracted only when it is safe to evaluate the variable + and thus an error and crash are avoided. + +-------- + +.. f:subroutine:: gather_atoms(name, count, data) + + This function calls :c:func:`lammps_gather_atoms` to gather the named + atom-based entity for all atoms on all processors and return it in the + vector *data*. The vector *data* will be ordered by atom + ID, which requires consecutive atom IDs (1 to *natoms*). + + .. versionadded:: TBD + + If you need a similar array but have non-consecutive atom IDs, see + :f:func:`gather_atoms_concat`; for a similar array but for a subset + of atoms, see :f:func:`gather_atoms_subset`. + + The *data* array will be ordered in groups of *count* values, sorted by atom + ID (e.g., if *name* is *x* and *count* = 3, then *data* = x[1][1], x[2][1], + x[3][1], x[1][2], x[2][2], x[3][2], x[1][3], :math:`\dots`); + *data* must be ``ALLOCATABLE`` and will be allocated to length + (*count* :math:`\times` *natoms*), as queried by + :f:func:`extract_setting`. + + :p character(len=\*) name: desired quantity (e.g., *x* or *mask*) + :p integer(c_int) count: number of per-atom values you expect per atom + (e.g., 1 for *type*, *mask*, or *charge*; 3 for *x*, *v*, or *f*). Use + *count* = 3 with *image* if you want a single image flag unpacked into + *x*/*y*/*z* components. + :p real(c_double) data [dimension(:),allocatable]: array into which to store + the data. Array *must* have the ``ALLOCATABLE`` attribute and be of rank 1 + (i.e., ``DIMENSION(:)``). If this array is already allocated, it will be + reallocated to fit the length of the incoming data. + + .. note:: + + If you want data from this function to be accessible as a two-dimensional + array, you can declare a rank-2 pointer and reassign it, like so: + + .. code-block:: Fortran + + USE, INTRINSIC :: ISO_C_BINDING + USE LIBLAMMPS + TYPE(lammps) :: lmp + REAL(c_double), DIMENSION(:), ALLOCATABLE, TARGET :: xdata + REAL(c_double), DIMENSION(:,:), POINTER :: x + ! other code to set up, etc. + CALL lmp%gather_atoms('x',3,xdata) + x(1:3,1:size(xdata)/3) => xdata + + You can then access the *y*\ -component of atom 3 with ``x(2,3)``. + See the note about array index order at :f:func:`extract_atom`. + +-------- + +.. f:subroutine:: gather_atoms_concat(name, count, data) + + This function calls :c:func:`lammps_gather_atoms_concat` to gather the named + atom-based entity for all atoms on all processors and return it in the + vector *data*. + + .. versionadded:: TBD + + The vector *data* will not be ordered by atom ID, and there is no + restriction on the IDs being consecutive. If you need the IDs, you can do + another :f:func:`gather_atoms_concat` with *name* set to ``id``. + + If you need a similar array but have consecutive atom IDs, see + :f:func:`gather_atoms`; for a similar array but for a subset of atoms, see + :f:func:`gather_atoms_subset`. + + :p character(len=\*) name: desired quantity (e.g., *x* or *mask*) + :p integer(c_int) count: number of per-atom values you expect per atom + (e.g., 1 for *type*, *mask*, or *charge*; 3 for *x*, *v*, or *f*). Use + *count* = 3 with *image* if you want a single image flag unpacked into + *x*/*y*/*z* components. + :p real(c_double) data [dimension(:),allocatable]: array into which to store + the data. Array *must* have the ``ALLOCATABLE`` attribute and be of rank 1 + (i.e., ``DIMENSION(:)``). If this array is already allocated, it will be + reallocated to fit the length of the incoming data. + +-------- + +.. f:subroutine:: gather_atoms_subset(name, count, ids, data) + + This function calls :c:func:`lammps_gather_atoms_subset` to gather the named + atom-based entity for the atoms in the array *ids* from all processors and + return it in the vector *data*. + + .. versionadded: TBD + + This subroutine gathers data for the requested atom IDs and stores them in a + one-dimensional array allocated by the user. The data will be ordered by + atom ID, but there is no requirement that the IDs be consecutive. If you + wish to return a similar array for *all* the atoms, use + :f:func:`gather_atoms` or :f:func:`gather_atoms_concat`. + + The *data* array will be in groups of *count* values, sorted by atom ID + in the same order as the array *ids* (e.g., if *name* is *x*, *count* = 3, + and *ids* is [100, 57, 210], then *data* might look like + [x(1,100), x(2,100), x(3,100), x(1,57), x(2,57), x(3,57), x(1,210), + :math:`\dots`]; *ids* must be provided by the user, and *data* must be + of rank 1 (i.e., ``DIMENSION(:)``) and have the ``ALLOCATABLE`` attribute. + + :p character(len=\*) name: desired quantity (e.g., *x* or *mask*) + :p integer(c_int) count: number of per-atom values you expect per atom + (e.g., 1 for *type*, *mask*, or *charge*; 3 for *x*, *v*, or *f*). Use + *count* = 3 with *image* if you want a single image flag unpacked into + *x*/*y*/*z* components. + :p integer(c_int) ids [dimension(:)]: atom IDs corresponding to the atoms + to be gathered + :p real(c_double) data [dimension(:),allocatable]: array into which to store + the data. Array *must* have the ``ALLOCATABLE`` attribute and be of rank 1 + (i.e., ``DIMENSION(:)``). If this array is already allocated, it will be + reallocated to fit the length of the incoming data. + +-------- + +.. f:subroutine:: scatter_atoms(name, data) + + This function calls :c:func:`lammps_scatter_atoms` to scatter the named + atom-based entities in *data* to all processors. + + .. versionadded:: TBD + + This subroutine takes data stored in a one-dimensional array supplied by the + user and scatters them to all atoms on all processors. The data must be + ordered by atom ID, with the requirement that the IDs be consecutive. + Use :f:func:`scatter_atoms_subset` to scatter data for some (or all) + atoms, in any order. + + The *data* array needs to be ordered in groups of *count* values, sorted by + atom ID (e.g., if *name* is *x* and *count* = 3, then + *data* = [x(1,1) x(2,1) x(3,1) x(1,2) x(2,2) x(3,2) x(1,3) :math:`\dots`]; + *data* must be of length (*count* :math:`\times` *natoms*). + + :p character(len=\*) name: quantity to be scattered (e.g., *x* or *charge*) + :p polymorphic data [dimension(:)]: per-atom values packed in a one-dimensional array + containing the data to be scattered. This array must have length *natoms* + (e.g., for *type* or *charge*) or length *natoms*\ :math:`\times 3` + (e.g., for *x* or *f*). The array *data* must be rank 1 (i.e., + ``DIMENSION(:)``) and be of type ``INTEGER(c_int)`` (e.g., for *mask* or + *type*) or of type ``REAL(c_double)`` (e.g., for *x* or *charge* or *f*). + +-------- + +.. f:subroutine:: scatter_atoms_subset(name, ids, data) + + This function calls :c:func:`lammps_scatter_atoms_subset` to scatter the + named atom-based entities in *data* to all processors. + + .. versionadded:: TBD + + This subroutine takes data stored in a one-dimensional array supplied by the + user and scatters them to a subset of atoms on all processors. The array + *data* contains data associated with atom IDs, but there is no requirement + that the IDs be consecutive, as they are provided in a separate array, + *ids*. Use :f:func:`scatter_atoms` to scatter data for all atoms, in order. + + The *data* array needs to be organized in groups of 1 or 3 values, + depending on which quantity is being scattered, with the groups in the same + order as the array *ids*. For example, if you want *data* to be the array + [x(1,1) x(2,1) x(3,1) x(1,100) x(2,100) x(3,100) x(1,57) x(2,57) x(3,57)], + then *ids* would be [1 100 57] and *name* would be *x*. + + :p character(len=\*) name: quantity to be scattered (e.g., *x* or *charge*) + :p integer(c_int) ids [dimension(:)]: atom IDs corresponding to the atoms + being scattered + :p polymorphic data [dimension(:)]: per-atom values packed into a + one-dimensional array containing the data to be scattered. This array must + have either the same length as *ids* (for *mask*, *type*, etc.) or three + times its length (for *x*, *f*, etc.); the array must be rank 1 + and be of type ``INTEGER(c_int)`` (e.g., for *mask* or *type*) or of type + ``REAL(c_double)`` (e.g., *charge*, *x*, or *f*). + +-------- + +.. f:function:: version() + + This method returns the numeric LAMMPS version like + :cpp:func:`lammps_version` does. + + :r integer: LAMMPS version + +-------- + +.. f:subroutine:: get_os_info(buffer) + + This function can be used to retrieve detailed information about the hosting + operating system and compiler/runtime environment. + + .. versionadded:: TBD + + A suitable buffer has to be provided. The assembled text will be truncated + so as not to overflow this buffer. The string is typically a few hundred + bytes long. + +-------- + +.. f:function:: config_has_mpi_support() + + This function is used to query whether LAMMPS was compiled with a real MPI + library or in serial. + + .. versionadded:: TBD + + :r logical: ``.FALSE.`` when compiled with STUBS, ``.TRUE.`` if complied + with MPI. + +-------- + +.. f:function:: config_has_gzip_support() + + Check if the LAMMPS library supports reading or writing compressed + files via a pipe to gzip or similar compression programs. + + .. versionadded:: TBD + + Several LAMMPS commands (e.g., :doc:`read_data`, :doc:`write_data`, + :doc:`dump styles atom, custom, and xyz `) support reading and writing + compressed files via creating a pipe to the ``gzip`` program. This function + checks whether this feature was :ref:`enabled at compile time `. + It does **not** check whether ``gzip`` or any other supported compression + programs themselves are installed and usable. + + :r logical: + +-------- + +.. f:function:: config_has_png_support() + + Check if the LAMMPS library supports writing PNG format images. + + .. versionadded:: TBD + + The LAMMPS :doc:`dump style image ` supports writing multiple + image file formats. Most of them, however, need support from an external + library, and using that has to be :ref:`enabled at compile time `. + This function checks whether support for the `PNG image file format + `_ is available + in the current LAMMPS library. + + :r logical: + +-------- + +.. f:function:: config_has_jpeg_support() + + Check if the LAMMPS library supports writing JPEG format images. + + .. versionadded:: TBD + + The LAMMPS :doc:`dump style image ` supports writing multiple + image file formats. Most of them, however, need support from an external + library, and using that has to be :ref:`enabled at compile time `. + This function checks whether support for the `JPEG image file format + `_ is available in the current LAMMPS library. + + :r logical: + +-------- + +.. f:function:: config_has_ffmpeg_support() + + Check if the LAMMPS library supports creating movie files via a pipe to + ffmpeg. + + .. versionadded:: TBD + + The LAMMPS :doc:`dump style movie ` supports generating movies + from images on-the-fly via creating a pipe to the + `ffmpeg `_ program. + This function checks whether this feature was + :ref:`enabled at compile time `. + It does **not** check whether the ``ffmpeg`` itself is installed and usable. + + :r logical: + +-------- + +.. f:function:: config_has_exceptions() + + Check whether LAMMPS errors will throw C++ exceptions. + + .. versionadded:: TBD + + In case of an error, LAMMPS will either abort or throw a C++ exception. + The latter has to be :ref:`enabled at compile time `. + This function checks if exceptions were enabled. + + When using the library interface with C++ exceptions enabled, the library + interface functions will "catch" them, and the error status can then be + checked by calling :f:func:`has_error`. The most recent error message can be + retrieved via :f:func:`get_last_error_message`. + This can allow one to restart a calculation or delete and recreate + the LAMMPS instance when a C++ exception occurs. One application + of using exceptions this way is the :ref:`lammps_shell`. If C++ + exceptions are disabled and an error happens during a call to + LAMMPS or the Fortran API, the application will terminate. + + :r logical: + +-------- + +.. f:function:: config_has_package(name) + + Check whether a specific package has been included in LAMMPS + + .. versionadded:: TBD + + This function checks whether the LAMMPS library in use includes the specific + :doc:`LAMMPS package ` provided as argument. + + :r logical: + +-------- + +.. f:function:: config_package_count() + + Count the number of installed packages in the LAMMPS library. + + .. versionadded:: TBD + + This function counts how many :doc:`LAMMPS packages ` are + included in the LAMMPS library in use. It directly calls the C library + function :cpp:func:`lammps_config_package_count`. + + :r integer(c_int): number of packages installed + +-------- + +.. f:subroutine:: config_package_name(idx, buffer) + + Get the name of a package in the list of installed packages in the LAMMPS + library. + + .. versionadded:: TBD + + This subroutine copies the name of the package with the index *idx* into the + provided string *buffer*. If the name of the package exceeds the length of + the buffer, it will be truncated accordingly. If the index is out of range, + *buffer* is set to an empty string. + + :p integer(c_int) idx: index of the package in the list of included packages + :math:`(0 \le idx < \text{package count})` + :p character(len=\*) buffer: string to hold the name of the package + +-------- + +.. f:subroutine:: installed_packages(package[, length]) + + Obtain a list of the names of enabled packages in the LAMMPS shared library + and store it in *package*. + + This function is analogous to the :py:func`installed_packages` function in + the Python API. The optional argument *length* sets the length of each + string in the vector *package* (default: 31). + + :p character(len=:) package [dimension(:),allocatable]: list of packages; + *must* have the ``ALLOCATABLE`` attribute and be of rank-1 + (``DIMENSION(:)``) with allocatable length. + :o integer length [optional]: length of each string in the list. + Default: 31. + +-------- + +.. f:subroutine:: flush_buffers() + + This function calls :cpp:func:`lammps_flush_buffers`, which flushes buffered + output to be written to screen and logfile. This can simplify capturing + output from LAMMPS library calls. + + .. versionadded:: TBD + +-------- + +.. f:function:: is_running() + + Check if LAMMPS is currently inside a run or minimization. + + .. versionadded:: TBD + + This function can be used from signal handlers or multi-threaded + applications to determine if the LAMMPS instance is currently active. + + :r logical: ``.FALSE.`` if idle or ``.TRUE.`` if active + +-------- + +.. f:subroutine:: force_timeout() + + Force a timeout to stop an ongoing run cleanly. + + .. versionadded:: TBD + + This function can be used from signal handlers or multi-threaded + applications to cleanly terminate an ongoing run. + +-------- + +.. f:function:: has_error() + + Check if there is a (new) error message available. + + .. versionadded:: TBD + + This function can be used to query if an error inside of LAMMPS + has thrown a :ref:`C++ exception `. + + .. note:: + + This function will always report "no error" when the LAMMPS library + has been compiled without ``-DLAMMPS_EXCEPTIONS``, which turns fatal + errors aborting LAMMPS into C++ exceptions. You can use the library + function :cpp:func:`lammps_config_has_exceptions` to check if this is + the case. + + :r logical: ``.TRUE.`` if there is an error. + +-------- + +.. f:subroutine:: get_last_error_message(buffer[,status]) + + Copy the last error message into the provided buffer. + + .. versionadded:: TBD + + This function can be used to retrieve the error message that was set + in the event of an error inside of LAMMPS that resulted in a + :ref:`C++ exception `. A suitable buffer for a string has + to be provided. If the internally-stored error message is longer than the + string and the string does not have ``ALLOCATABLE`` length, it will be + truncated accordingly. The optional argument *status* indicates the + kind of error: a "1" indicates an error that occurred on all MPI ranks and + is often recoverable, while a "2" indicates an abort that would happen only + in a single MPI rank and thus may not be recoverable, as other MPI ranks may + be waiting on the failing MPI rank(s) to send messages. + + .. note:: + + This function will do nothing when the LAMMPS library has been + compiled without ``-DLAMMPS_EXCEPTIONS``, which turns errors aborting + LAMMPS into C++ exceptions. You can use the function + :f:func:`config_has_exceptions` to check whether this is the case. + + :p character(len=\*) buffer: string buffer to copy the error message into + :o integer(c_int) status [optional]: 1 when all ranks had the error, + 2 on a single-rank error. diff --git a/doc/src/Library_objects.rst b/doc/src/Library_objects.rst index eed14b3a05..8ebecfcc94 100644 --- a/doc/src/Library_objects.rst +++ b/doc/src/Library_objects.rst @@ -6,6 +6,7 @@ fixes, or variables in LAMMPS using the following functions: - :cpp:func:`lammps_extract_compute` - :cpp:func:`lammps_extract_fix` +- :cpp:func:`lammps_extract_variable_datatype` - :cpp:func:`lammps_extract_variable` - :cpp:func:`lammps_set_variable` @@ -21,6 +22,11 @@ fixes, or variables in LAMMPS using the following functions: ----------------------- +.. doxygenfunction:: lammps_extract_variable_datatype + :project: progguide + +----------------------- + .. doxygenfunction:: lammps_extract_variable :project: progguide @@ -36,3 +42,5 @@ fixes, or variables in LAMMPS using the following functions: .. doxygenenum:: _LMP_STYLE_CONST .. doxygenenum:: _LMP_TYPE_CONST + +.. doxygenenum:: _LMP_VAR_CONST diff --git a/doc/src/Library_properties.rst b/doc/src/Library_properties.rst index a5c9c79c64..dfd72adc95 100644 --- a/doc/src/Library_properties.rst +++ b/doc/src/Library_properties.rst @@ -16,8 +16,8 @@ This section documents the following functions: -------------------- The library interface allows the extraction of different kinds of -information about the active simulation instance and also - in some -cases - to apply modifications to it. This enables combining of a +information about the active simulation instance and also---in some +cases---to apply modifications to it. This enables combining of a LAMMPS simulation with other processing and simulation methods computed by the calling code, or by another code that is coupled to LAMMPS via the library interface. In some cases the data returned is direct @@ -25,9 +25,9 @@ reference to the original data inside LAMMPS, cast to a void pointer. In that case the data needs to be cast to a suitable pointer for the calling program to access it, and you may need to know the correct dimensions and lengths. This also means you can directly change those -value(s) from the calling program, e.g. to modify atom positions. Of -course, this should be done with care. When accessing per-atom data, -please note that this data is the per-processor **local** data and is +value(s) from the calling program (e.g., to modify atom positions). Of +course, changing values should be done with care. When accessing per-atom +data, please note that these data are the per-processor **local** data and are indexed accordingly. Per-atom data can change sizes and ordering at every neighbor list rebuild or atom sort event as atoms migrate between sub-domains and processors. diff --git a/doc/src/Run_basics.rst b/doc/src/Run_basics.rst index 5f1211d093..d2810f5986 100644 --- a/doc/src/Run_basics.rst +++ b/doc/src/Run_basics.rst @@ -30,12 +30,13 @@ executable itself can be placed elsewhere. .. note:: - The redirection operator "<" will not always work when running - in parallel with mpirun or mpiexec; for those systems the -in form is required. + The redirection operator "<" will not always work when running in + parallel with ``mpirun`` or ``mpiexec``; for those systems the -in + form is required. As LAMMPS runs it prints info to the screen and a logfile named -*log.lammps*\ . More info about output is given on the -:doc:`screen and logfile output ` page. +*log.lammps*\ . More info about output is given on the :doc:`screen and +logfile output ` page. If LAMMPS encounters errors in the input script or while running a simulation it will print an ERROR message and stop or a WARNING diff --git a/doc/src/Run_options.rst b/doc/src/Run_options.rst index f3c7973197..f7bb652ea9 100644 --- a/doc/src/Run_options.rst +++ b/doc/src/Run_options.rst @@ -93,13 +93,13 @@ switch is not set (the default), LAMMPS will operate as if the KOKKOS package were not installed; i.e. you can run standard LAMMPS or with the GPU or OPENMP packages, for testing or benchmarking purposes. -Additional optional keyword/value pairs can be specified which -determine how Kokkos will use the underlying hardware on your -platform. These settings apply to each MPI task you launch via the -"mpirun" or "mpiexec" command. You may choose to run one or more MPI -tasks per physical node. Note that if you are running on a desktop -machine, you typically have one physical node. On a cluster or -supercomputer there may be dozens or 1000s of physical nodes. +Additional optional keyword/value pairs can be specified which determine +how Kokkos will use the underlying hardware on your platform. These +settings apply to each MPI task you launch via the ``mpirun`` or +``mpiexec`` command. You may choose to run one or more MPI tasks per +physical node. Note that if you are running on a desktop machine, you +typically have one physical node. On a cluster or supercomputer there +may be dozens or 1000s of physical nodes. Either the full word or an abbreviation can be used for the keywords. Note that the keywords do not use a leading minus sign. I.e. the @@ -148,9 +148,9 @@ one of these 4 environment variables MV2_COMM_WORLD_LOCAL_RANK (Mvapich) OMPI_COMM_WORLD_LOCAL_RANK (OpenMPI) -which are initialized by the "srun", "mpirun" or "mpiexec" commands. -The environment variable setting for each MPI rank is used to assign a -unique GPU ID to the MPI task. +which are initialized by the ``srun``, ``mpirun``, or ``mpiexec`` +commands. The environment variable setting for each MPI rank is used to +assign a unique GPU ID to the MPI task. .. parsed-literal:: diff --git a/doc/src/Speed_gpu.rst b/doc/src/Speed_gpu.rst index 883bc9c7e3..e95787ebee 100644 --- a/doc/src/Speed_gpu.rst +++ b/doc/src/Speed_gpu.rst @@ -76,10 +76,11 @@ instructions. **Run with the GPU package from the command line:** -The mpirun or mpiexec command sets the total number of MPI tasks used -by LAMMPS (one or multiple per compute node) and the number of MPI -tasks used per node. E.g. the mpirun command in MPICH does this via -its -np and -ppn switches. Ditto for OpenMPI via -np and -npernode. +The ``mpirun`` or ``mpiexec`` command sets the total number of MPI tasks +used by LAMMPS (one or multiple per compute node) and the number of MPI +tasks used per node. E.g. the ``mpirun`` command in MPICH does this via +its ``-np`` and ``-ppn`` switches. Ditto for OpenMPI via ``-np`` and +``-npernode``. When using the GPU package, you cannot assign more than one GPU to a single MPI task. However multiple MPI tasks can share the same GPU, @@ -129,8 +130,8 @@ GPU package pair styles. **Or run with the GPU package by editing an input script:** -The discussion above for the mpirun/mpiexec command, MPI tasks/node, -and use of multiple MPI tasks/GPU is the same. +The discussion above for the ``mpirun`` or ``mpiexec`` command, MPI +tasks/node, and use of multiple MPI tasks/GPU is the same. Use the :doc:`suffix gpu ` command, or you can explicitly add an "gpu" suffix to individual styles in your input script, e.g. diff --git a/doc/src/Speed_kokkos.rst b/doc/src/Speed_kokkos.rst index 8b9b2e99af..73345b7e88 100644 --- a/doc/src/Speed_kokkos.rst +++ b/doc/src/Speed_kokkos.rst @@ -72,12 +72,12 @@ See the :ref:`Build extras ` page for instructions. Running LAMMPS with the KOKKOS package """""""""""""""""""""""""""""""""""""" -All Kokkos operations occur within the context of an individual MPI -task running on a single node of the machine. The total number of MPI -tasks used by LAMMPS (one or multiple per compute node) is set in the -usual manner via the mpirun or mpiexec commands, and is independent of -Kokkos. E.g. the mpirun command in OpenMPI does this via its -np and --npernode switches. Ditto for MPICH via -np and -ppn. +All Kokkos operations occur within the context of an individual MPI task +running on a single node of the machine. The total number of MPI tasks +used by LAMMPS (one or multiple per compute node) is set in the usual +manner via the ``mpirun`` or ``mpiexec`` commands, and is independent of +Kokkos. E.g. the mpirun command in OpenMPI does this via its ``-np`` and +``-npernode`` switches. Ditto for MPICH via ``-np`` and ``-ppn``. Running on a multi-core CPU ^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -168,7 +168,7 @@ for your MPI installation), binding can be forced with these flags: .. parsed-literal:: - OpenMPI 1.8: mpirun -np 2 --bind-to socket --map-by socket ./lmp_openmpi ... + OpenMPI 1.8: mpirun -np 2 --bind-to socket --map-by socket ./lmp_openmpi ... Mvapich2 2.0: mpiexec -np 2 --bind-to socket --map-by socket ./lmp_mvapich ... For binding threads with KOKKOS OpenMP, use thread affinity environment @@ -310,7 +310,8 @@ Alternatively the effect of the "-sf" or "-pk" switches can be duplicated by adding the :doc:`package kokkos ` or :doc:`suffix kk ` commands to your input script. The discussion above for building LAMMPS with the KOKKOS package, the -mpirun/mpiexec command, and setting appropriate thread are the same. +``mpirun`` or ``mpiexec`` command, and setting appropriate thread +properties are the same. You must still use the "-k on" :doc:`command-line switch ` to enable the KOKKOS package, and specify its additional arguments for diff --git a/doc/src/Speed_omp.rst b/doc/src/Speed_omp.rst index 29c55df62f..7f8913d20f 100644 --- a/doc/src/Speed_omp.rst +++ b/doc/src/Speed_omp.rst @@ -33,8 +33,8 @@ These examples assume one or more 16-core nodes. mpirun -np 4 lmp_omp -sf omp -pk omp 4 -in in.script # 4 MPI tasks, 4 threads/task mpirun -np 32 -ppn 4 lmp_omp -sf omp -pk omp 4 -in in.script # 8 nodes, 4 MPI tasks/node, 4 threads/task -The mpirun or mpiexec command sets the total number of MPI tasks used -by LAMMPS (one or multiple per compute node) and the number of MPI +The ``mpirun`` or ``mpiexec`` command sets the total number of MPI tasks +used by LAMMPS (one or multiple per compute node) and the number of MPI tasks used per node. E.g. the mpirun command in MPICH does this via its -np and -ppn switches. Ditto for OpenMPI via -np and -npernode. @@ -58,8 +58,8 @@ OMP_NUM_THREADS environment variable. Or run with the OPENMP package by editing an input script """"""""""""""""""""""""""""""""""""""""""""""""""""""""""" -The discussion above for the mpirun/mpiexec command, MPI tasks/node, -and threads/MPI task is the same. +The discussion above for the ``mpirun`` or ``mpiexec`` command, MPI +tasks/node, and threads/MPI task is the same. Use the :doc:`suffix omp ` command, or you can explicitly add an "omp" suffix to individual styles in your input script, e.g. diff --git a/doc/src/atom_style.rst b/doc/src/atom_style.rst index b753fdbe5f..c3b16c6ad1 100644 --- a/doc/src/atom_style.rst +++ b/doc/src/atom_style.rst @@ -91,7 +91,7 @@ quantities. +--------------+-----------------------------------------------------+--------------------------------------+ | *charge* | charge | atomic system with charges | +--------------+-----------------------------------------------------+--------------------------------------+ -| *dielectric* | dipole, area, curvature | system with surface polarization | +| *dielectric* | normx normy normz area/patch ed em epsilon curv | system with surface polarization | +--------------+-----------------------------------------------------+--------------------------------------+ | *dipole* | charge and dipole moment | system with dipolar particles | +--------------+-----------------------------------------------------+--------------------------------------+ @@ -180,16 +180,21 @@ vector with the 3 diameters of the ellipsoid and a quaternion 4-vector with its orientation. For the *dielectric* style, each particle can be either a physical -particle (e.g. an ion), or an interface particle representing a -boundary element. For physical particles, the per-particle properties -are the same as atom_style full. For interface particles, in addition -to these properties, each particle also has an area, a normal unit -vector, a mean local curvature, the mean and difference of the -dielectric constants of two sides of the interface, and the local -dielectric constant at the boundary element. The distinction between -the physical and interface particles is only meaningful when :doc:`fix -polarize ` commands are applied to the interface -particles. +particle (e.g. an ion), or an interface particle representing a boundary +element between two regions of different dielectric constant. For +interface particles, in addition to the properties associated with +atom_style full, each particle also should be assigned a normal unit +vector (defined by normx, normy, normz), an area (area/patch), the +difference and mean of the dielectric constants of two sides of the +interface along the direction of the normal vector (ed and em), the +local dielectric constant at the boundary element (epsilon), and a mean +local curvature (curv). Physical particles must be assigned these +values, as well, but only their local dielectric constants will be used; +see documentation for associated :doc:`pair styles ` +and :doc:`fixes `. The distinction between the physical +and interface particles is only meaningful when :doc:`fix polarize +` commands are applied to the interface particles. This +style is part of the DIELECTRIC package. For the *dipole* style, a point dipole is defined for each point particle. Note that if you wish the particles to be finite-size diff --git a/doc/src/fix_polarize.rst b/doc/src/fix_polarize.rst index 6ed3b36c55..6a997aa7ea 100644 --- a/doc/src/fix_polarize.rst +++ b/doc/src/fix_polarize.rst @@ -16,11 +16,11 @@ Syntax .. parsed-literal:: - fix ID group-ID style nevery tolerance ... + fix ID group-ID style nevery tolerance * ID, group-ID are documented in :doc:`fix ` command * style = *polarize/bem/gmres* or *polarize/bem/icc* or *polarize/functional* -* Nevery = this fixed is invoked every this many timesteps +* nevery = this fixed is invoked every this many timesteps * tolerance = the relative tolerance for the iterative solver to stop @@ -46,44 +46,53 @@ Description These fixes compute induced charges at the interface between two impermeable media with different dielectric constants. The interfaces -need to be discretized into vertices, each representing a boundary element. -The vertices are treated as if they were regular atoms or particles. -:doc:`atom_style dielectric ` should be used since it defines -the additional properties of each interface particle such as -interface normal vectors, element areas, and local dielectric mismatch. -These fixes also require the use of :doc:`pair_style ` and -:doc:`kspace_style ` with the *dielectric* suffix. -At every time step, given a configuration of the physical charges in the system -(such as atoms and charged particles) these fixes compute and update -the charge of the interface particles. The interfaces are allowed to move -during the simulation with appropriate time integrators (for example, -with :doc:`fix_rigid `). +need to be discretized into vertices, each representing a boundary +element. The vertices are treated as if they were regular atoms or +particles. :doc:`atom_style dielectric ` should be used +since it defines the additional properties of each interface particle +such as interface normal vectors, element areas, and local dielectric +mismatch. These fixes also require the use of :doc:`pair_style +` and :doc:`kspace_style ` with the +*dielectric* suffix. At every time step, given a configuration of the +physical charges in the system (such as atoms and charged particles) +these fixes compute and update the charge of the interface +particles. The interfaces are allowed to move during the simulation if +the appropriate time integrators are also set (for example, with +:doc:`fix_rigid `). -Consider an interface between two media: one with dielectric constant -of 78 (water), the other of 4 (silica). The interface is discretized -into 2000 boundary elements, each represented by an interface particle. Suppose that -each interface particle has a normal unit vector pointing from the silica medium to water. -The dielectric difference along the normal vector is then 78 - 4 = 74, -the mean dielectric value is (78 + 4) / 2 = 41. Each boundary element -also has its area and the local mean curvature (which is used by these fixes -for computing a correction term in the local electric field). -To model charged interfaces, the interface particle will have a non-zero charge value, +Consider an interface between two media: one with dielectric constant of +78 (water), the other of 4 (silica). The interface is discretized into +2000 boundary elements, each represented by an interface +particle. Suppose that each interface particle has a normal unit vector +pointing from the silica medium to water. The dielectric difference +along the normal vector is then 78 - 4 = 74, the mean dielectric value +is (78 + 4) / 2 = 41. Each boundary element also has its area and the +local mean curvature, which is used by these fixes for computing a +correction term in the local electric field. To model charged +interfaces, the interface particle will have a non-zero charge value, coming from its area and surface charge density. -For non-interface particles such as atoms and charged particles, -the interface normal vectors, element area, and dielectric mismatch are -irrelevant. Their local dielectric value is used to rescale their actual charge -when computing the Coulombic interactions. For instance, for a cation carrying -a charge of +2 (in charge unit) in an implicit solvent with dielectric constant of 40 -would have actual charge of +2, and a local dielectric constant value of 40. -It is assumed that the particles cannot pass through the interface during the simulation -so that its local dielectric constant value does not change. +For non-interface particles such as atoms and charged particles, the +interface normal vectors, element area, and dielectric mismatch are +irrelevant and unused. Their local dielectric value is used internally +to rescale their given charge when computing the Coulombic +interactions. For instance, to simulate a cation carrying a charge of +2 +(in simulation charge units) in an implicit solvent with a dielectric +constant of 40, the cation's charge should be set to +2 and its local +dielectric constant property (defined in the :doc:`atom_style dielectric +`) should be set to 40; there is no need to manually rescale +charge. This will produce the proper force for any :doc:`pair_style +` with the dielectric suffix. It is assumed that the +particles cannot pass through the interface during the simulation +because the value of the local dielectric constant property does not +change. -There are some example scripts for using these fixes -with LAMMPS in the ``examples/PACKAGES/dielectric`` directory. The README file -therein contains specific details on the system setup. Note that the example data files -show the additional fields (columns) needed for :doc:`atom_style dielectric ` -beyond the conventional fields *id*, *mol*, *type*, *q*, *x*, *y*, and *z*. +There are some example scripts for using these fixes with LAMMPS in the +``examples/PACKAGES/dielectric`` directory. The README file therein +contains specific details on the system setup. Note that the example +data files show the additional fields (columns) needed for +:doc:`atom_style dielectric ` beyond the conventional fields +*id*, *mol*, *type*, *q*, *x*, *y*, and *z*. ---------- @@ -104,22 +113,24 @@ the interface, are computed using the equation: * :math:`\mathbf{E}(\mathbf{s})` is the electrical field at the vertex * :math:`\mathbf{n}(\mathbf{s})` is the unit normal vector at the vertex pointing from medium with :math:`\epsilon_2` to that with :math:`\epsilon_1` -Fix *polarize/bem/gmres* employs the Generalized Minimum Residual (GMRES) -as described in :ref:`(Barros) ` to solve :math:`\sigma_b`. +Fix *polarize/bem/gmres* employs the Generalized Minimum Residual +(GMRES) as described in :ref:`(Barros) ` to solve +:math:`\sigma_b`. Fix *polarize/bem/icc* employs the successive over-relaxation algorithm as described in :ref:`(Tyagi) ` to solve :math:`\sigma_b`. -The iterative solvers would terminate either when the maximum relative change -in the induced charges in consecutive iterations is below the set tolerance, -or when the number of iterations reaches *iter_max* (see below). +The iterative solvers would terminate either when the maximum relative +change in the induced charges in consecutive iterations is below the set +tolerance, or when the number of iterations reaches *iter_max* (see +below). -Fix *polarize/functional* employs the energy functional variation approach -as described in :ref:`(Jadhao) ` to solve :math:`\sigma_b`. +Fix *polarize/functional* employs the energy functional variation +approach as described in :ref:`(Jadhao) ` to solve +:math:`\sigma_b`. - -More details on the implementation of these fixes and their recommended use -are described in :ref:`(NguyenTD) `. +More details on the implementation of these fixes and their recommended +use are described in :ref:`(NguyenTD) `. Restart, fix_modify, output, run start/stop, minimize info @@ -127,35 +138,78 @@ Restart, fix_modify, output, run start/stop, minimize info No information about this fix is written to :doc:`binary restart files `. -The :doc:`fix_modify ` command provides certain options to -control the induced charge solver and the initial values of the interface elements: +The :doc:`fix_modify ` command provides the ability to modify certain +settings: .. parsed-literal:: *itr_max* arg arg = maximum number of iterations for convergence *dielectrics* ediff emean epsilon area charge - ediff = dielectric difference - emean = dielectric mean - epsilon = local dielectric value - aree = element area - charge = real interface charge + ediff = dielectric difference or NULL + emean = dielectric mean or NULL + epsilon = local dielectric value or NULL + area = element area or NULL + charge = real interface charge or NULL + *kspace* arg = yes or no + *rand* max seed + max = range of random induced charges to be generated + seed = random number seed to use when generating random charge + *mr* arg + arg = maximum number of q-vectors to use when solving (GMRES only) + *omega* arg + arg = relaxation parameter to use when iterating (ICC only) -*polarize/bem/gmres* or *polarize/bem/icc* compute a global 2-element vector -which can be accessed by various :doc:`output commands `. -The first element is the number of iterations when the solver terminates -(of which the upper bound is set by *iter_max*). The second element is the RMS error. +The *itr_max* keyword sets the max number of iterations to be used for +solving each step. + +The *dielectrics* keyword allows properties of the atoms in group +*group-ID* to be modified. Values passed to any of the arguments +(*ediff*, *emean*, *epsilon*, *area*, *charge*) will override existing +values for all atoms in the group *group-ID*. Passing NULL to any of +these arguments will preserve the existing value. Note that setting the +properties of the interface this way will change the properties of all +atoms associated with the fix (all atoms in *group-ID*), so multiple fix +and fix_modify commands would be needed to change the properties of two +different interfaces to different values (one fix and fix_modify for +each interface group). + +The *kspace* keyword turns on long range interactions. + +If the arguments of the *rand* keyword are set, then the atoms subject +to this fix will be assigned a random initial charge in a uniform +distribution from -*max*/2 to *max*/2, using random number seed *seed*. + +The *mr* keyword only applies to *style* = *polarize/bem/gmres*. It is +the maximum number of q-vectors to use when solving for the surface +charge. + +The *omega* keyword only applies when using *style* = +*polarize/bem/icc*. It is a relaxation parameter defined in +:ref:`(Tyagi) ` that should generally be set between 0 and 2. + +Note that the local dielectric constant (epsilon) can also be set +independently using the :doc:`set ` command. + +---------- + +*polarize/bem/gmres* or *polarize/bem/icc* compute a global 2-element +vector which can be accessed by various :doc:`output commands +`. The first element is the number of iterations when the +solver terminates (of which the upper bound is set by *iter_max*). The +second element is the RMS error. Restrictions """""""""""" -These fixes are part of the DIELECTRIC package. It is only enabled +These fixes are part of the DIELECTRIC package. They are only enabled if LAMMPS was built with that package, which requires that also the KSPACE package is installed. See the :doc:`Build package ` page for more info. -Note that the *polarize/bem/gmres* and *polarize/bem/icc* fixes only support -:doc:`units ` *lj*, *real*, *metal*, *si* and *nano* at the moment. +Note that the *polarize/bem/gmres* and *polarize/bem/icc* fixes only +support :doc:`units ` *lj*, *real*, *metal*, *si* and *nano* at +the moment. Related commands @@ -171,6 +225,15 @@ Default *iter_max* = 20 +*kspace* = yes + +*omega* = 0.7 (ICC only) + +*mr* = \# atoms in group *group-ID* minus 1 (GMRES only) + +No random charge initialization happens by default. + + ---------- .. _Barros: diff --git a/doc/src/pair_dielectric.rst b/doc/src/pair_dielectric.rst index dbdd163a76..0b60d202ea 100644 --- a/doc/src/pair_dielectric.rst +++ b/doc/src/pair_dielectric.rst @@ -76,16 +76,19 @@ Description """"""""""" All these pair styles are derived from the corresponding pair styles -without the *dielectric*\ suffix. In addition to computing atom forces -and energies, these pair styles compute the electrical field vector -at each atom, which are to be used in the :doc:`fix polarize ` commands. +without the *dielectric* suffix. In addition to computing atom forces +and energies, these pair styles compute the electric field vector at +each atom, which are intended to be used by the :doc:`fix polarize +` commands to compute induced charges at interfaces +between two regions of different dielectric constant. -These pair styles should be used with :doc:`atom_style dielectric `, -which uses atom charges rescaled by their local dielectric constant. +These pair styles should be used with :doc:`atom_style dielectric +`. The styles lj/cut/coul/long/dielectric, lj/cut/coul/msm/dielectric, and -lj/long/coul/long/dielectric should be used with their kspace style counterparts, -namely, pppm/dielectric, pppm/disp/dielectric, and msm/dielectric, respectively. +lj/long/coul/long/dielectric should be used with their kspace style +counterparts, namely, pppm/dielectric, pppm/disp/dielectric, and +msm/dielectric, respectively. ---------- @@ -97,24 +100,27 @@ Mixing, shift, table, tail correction, restart, rRESPA info """"""""""""""""""""""""""""""""""""""""""""""""""""""""""" For atom type pairs I,J and I != J, the epsilon and sigma coefficients -and cutoff distances for this pair style can be mixed. The default -mix value is *geometric*\ . See the "pair_modify" command for details. +and cutoff distances for this pair style can be mixed. The default mix +algorithm is *geometric*\ . See the :doc:`pair_modify `" +command for details. The :doc:`pair_modify ` table option is not relevant for this pair style. -This pair style writes its information to :doc:`binary restart files `, so pair_style and pair_coeff commands do not need -to be specified in an input script that reads a restart file. +These pair styles write its information to :doc:`binary restart files +`, so pair_style and pair_coeff commands do not need to be +specified in an input script that reads a restart file. -This pair style can only be used via the *pair* keyword of the +These pair styles can only be used via the *pair* keyword of the :doc:`run_style respa ` command. It does not support the *inner*, *middle*, *outer* keywords. Restrictions """""""""""" -These styles are part of the DIELECTRIC package. They are only enabled if -LAMMPS was built with that package. See the :doc:`Build package ` page for more info. +These styles are part of the DIELECTRIC package. They are only enabled +if LAMMPS was built with that package. See the :doc:`Build package +` page for more info. Related commands """""""""""""""" diff --git a/doc/src/pair_pace.rst b/doc/src/pair_pace.rst index c9857c5caa..b54e585f69 100644 --- a/doc/src/pair_pace.rst +++ b/doc/src/pair_pace.rst @@ -1,11 +1,15 @@ .. index:: pair_style pace .. index:: pair_style pace/kk +.. index:: pair_style pace/extrapolation pair_style pace command ======================= Accelerator Variants: *pace/kk* +pair_style pace/extrapolation command +===================================== + Syntax """""" @@ -22,6 +26,10 @@ Syntax *recursive* = use recursive algorithm for basis functions *chunksize* value = number of atoms in each pass +.. code-block:: LAMMPS + + pair_style pace/extrapolation + Examples """""""" @@ -31,50 +39,96 @@ Examples pair_style pace product chunksize 2048 pair_coeff * * Cu-PBE-core-rep.ace Cu + pair_style pace/extrapolation + pair_coeff * * Cu.yaml Cu.asi Cu + Description """"""""""" Pair style *pace* computes interactions using the Atomic Cluster Expansion (ACE), which is a general expansion of the atomic energy in -multi-body basis functions. :ref:`(Drautz) `. -The *pace* pair style -provides an efficient implementation that -is described in this paper :ref:`(Lysogorskiy) `. +multi-body basis functions. :ref:`(Drautz) `. The *pace* +pair style provides an efficient implementation that is described in +this paper :ref:`(Lysogorskiy) `. -In ACE, the total energy is decomposed into a sum over -atomic energies. The energy of atom *i* is expressed as a -linear or non-linear function of one or more density functions. -By projecting the -density onto a local atomic base, the lowest order contributions -to the energy can be expressed as a set of scalar polynomials in -basis function contributions summed over neighbor atoms. +In ACE, the total energy is decomposed into a sum over atomic +energies. The energy of atom *i* is expressed as a linear or non-linear +function of one or more density functions. By projecting the density +onto a local atomic base, the lowest order contributions to the energy +can be expressed as a set of scalar polynomials in basis function +contributions summed over neighbor atoms. Only a single pair_coeff command is used with the *pace* style which specifies an ACE coefficient file followed by N additional arguments -specifying the mapping of ACE elements to LAMMPS atom types, -where N is the number of LAMMPS atom types: +specifying the mapping of ACE elements to LAMMPS atom types, where N is +the number of LAMMPS atom types: * ACE coefficient file * N element names = mapping of ACE elements to atom types Only a single pair_coeff command is used with the *pace* style which -specifies an ACE file that fully defines the potential. -Note that unlike for other potentials, cutoffs are -not set in the pair_style or pair_coeff command; they are specified in -the ACE file. +specifies an ACE file that fully defines the potential. Note that +unlike for other potentials, cutoffs are not set in the pair_style or +pair_coeff command; they are specified in the ACE file. The pair_style *pace* command may be followed by the optional keyword -*product* or *recursive*, which determines which of two algorithms -is used for the calculation of basis functions and derivatives. -The default is *recursive*. +*product* or *recursive*, which determines which of two algorithms is +used for the calculation of basis functions and derivatives. The +default is *recursive*. -The keyword *chunksize* is only applicable when -using the pair style *pace* with the KOKKOS package on GPUs and is -ignored otherwise. This keyword controls the number of atoms -in each pass used to compute the atomic cluster expansion and is used to -avoid running out of memory. For example if there are 8192 atoms in the -simulation and the *chunksize* is set to 4096, the ACE -calculation will be broken up into two passes (running on a single GPU). +The keyword *chunksize* is only applicable when using the pair style +*pace* with the KOKKOS package on GPUs and is ignored otherwise. This +keyword controls the number of atoms in each pass used to compute the +atomic cluster expansion and is used to avoid running out of memory. +For example if there are 8192 atoms in the simulation and the +*chunksize* is set to 4096, the ACE calculation will be broken up into +two passes (running on a single GPU). + +Extrapolation grade +""""""""""""""""""" + +Calculation of extrapolation grade in PACE is implemented in `pair_style +pace/extrapolation`. It is based on the MaxVol algorithm similar to +Moment Tensor Potential (MTP) by Shapeev et al. and is described in +:ref:`(Lysogorskiy2) `. In order to compute +extrapolation grade one needs to provide: + +#. ACE potential in B-basis form (`.yaml` format) and +#. Active Set Inverted (ASI) file for corresponding potential (`.asi` format) + +Calculation of extrapolation grades requires matrix-vector +multiplication for each atom and is slower than the usual `pair_style +pace recursive`, therefore it is *not* computed by default. +Extrapolation grade calculation is involved by `fix pair`, which +requests to compute `gamma`, as shown in example below: + +.. code-block:: LAMMPS + + pair_style pace/extrapolation + pair_coeff * * Cu.yaml Cu.asi Cu + + fix pace_gamma all pair 10 pace/extrapolation gamma 1 + + compute max_pace_gamma all reduce max f_pace_gamma + variable dump_skip equal "c_max_pace_gamma < 5" + + + dump pace_dump all custom 20 extrapolative_structures.dump id x y z f_pace_gamma + dump_modify pace_dump skip v_dump_skip + + variable max_pace_gamma equal c_max_pace_gamma + fix extreme_extrapolation all halt 10 v_max_pace_gamma > 25 + +Here extrapolation grade gamma is computed every 10 steps and is stored +in `f_pace_gamma` per-atom variable. The largest value of extrapolation +grade among all atoms in a structure is reduced to `c_max_pace_gamma` +variable. Only if this value exceeds extrapolation threshold 5, then +the structure will be dumped into `extrapolative_structures.dump` file, +but not more often than every 20 steps. + +On all other steps `pair_style pace recursive` will be used. + +---------- See the :doc:`pair_coeff ` page for alternate ways to specify the path for the ACE coefficient file. @@ -90,9 +144,10 @@ specify a pair_coeff command with I != J arguments for this style. This pair style does not support the :doc:`pair_modify ` shift, table, and tail options. -This pair style does not write its information to :doc:`binary restart files `, since it is stored in potential files. Thus, you -need to re-specify the pair_style and pair_coeff commands in an input -script that reads a restart file. +This pair style does not write its information to :doc:`binary restart +files `, since it is stored in potential files. Thus, you need +to re-specify the pair_style and pair_coeff commands in an input script +that reads a restart file. This pair style can only be used via the *pair* keyword of the :doc:`run_style respa ` command. It does not support the @@ -107,19 +162,20 @@ This pair style can only be used via the *pair* keyword of the Restrictions """""""""""" -This pair style is part of the ML-PACE package. It is only enabled if LAMMPS -was built with that package. -See the :doc:`Build package ` page for more info. +This pair style is part of the ML-PACE package. It is only enabled if +LAMMPS was built with that package. See the :doc:`Build package +` page for more info. Related commands """""""""""""""" -:doc:`pair_style snap ` +:doc:`pair_style snap `, +:doc:`fix pair ` Default """"""" -recursive, chunksize = 4096 +recursive, chunksize = 4096, .. _Drautz20191: @@ -127,4 +183,8 @@ recursive, chunksize = 4096 .. _Lysogorskiy20211: -**(Lysogorskiy)** Lysogorskiy, van der Oord, Bochkarev, Menon, Rinaldi, Hammerschmidt, Mrovec, Thompson, Csanyi, Ortner, Drautz, TBD (2021). +**(Lysogorskiy)** Lysogorskiy, van der Oord, Bochkarev, Menon, Rinaldi, Hammerschmidt, Mrovec, Thompson, Csanyi, Ortner, Drautz, npj Comp Mat, 7, 97 (2021). + +.. _Lysogorskiy2022: + +**(Lysogorskiy2022)** Lysogorskiy, Bochkarev, Mrovec, Drautz, TBS (2022). diff --git a/doc/src/pair_style.rst b/doc/src/pair_style.rst index bd047c687f..a9b97b05c0 100644 --- a/doc/src/pair_style.rst +++ b/doc/src/pair_style.rst @@ -313,6 +313,7 @@ accelerated styles exist. * :doc:`oxrna2/stk ` - * :doc:`oxrna2/xstk ` - * :doc:`pace ` - Atomic Cluster Expansion (ACE) machine-learning potential +* :doc:`pace/extrapolation ` - Atomic Cluster Expansion (ACE) machine-learning potential with extrapolation grades * :doc:`peri/eps ` - peridynamic EPS potential * :doc:`peri/lps ` - peridynamic LPS potential * :doc:`peri/pmb ` - peridynamic PMB potential diff --git a/doc/utils/sphinx-config/false_positives.txt b/doc/utils/sphinx-config/false_positives.txt index b87e77e861..df93ebf761 100644 --- a/doc/utils/sphinx-config/false_positives.txt +++ b/doc/utils/sphinx-config/false_positives.txt @@ -78,6 +78,7 @@ Alexey ali aliceblue Allinger +allocatable allocator allocators allosws @@ -599,6 +600,7 @@ Cummins Cundall cundall Curk +curv Cusentino customIDs cutbond @@ -657,6 +659,7 @@ Dcut de dE De +deallocate deallocated debye Debye @@ -691,6 +694,7 @@ dequidt Dequidt der dereference +dereferenced derekt Deresiewicz Derjagin @@ -1486,6 +1490,7 @@ interfacial interial interlayer intermolecular +interoperable Interparticle interstitials intertube @@ -2387,6 +2392,7 @@ Nmols nn nnodes npits +npj nO Nocedal nocite @@ -3163,6 +3169,7 @@ sfree Sg Shan Shanno +Shapeev shapex shapey shapez @@ -3619,6 +3626,7 @@ Universite unix unmaintained unoptimized +unordered unpadded unphysical unphysically diff --git a/examples/PACKAGES/dielectric/README b/examples/PACKAGES/dielectric/README index 1d5f30263c..ef775e479f 100644 --- a/examples/PACKAGES/dielectric/README +++ b/examples/PACKAGES/dielectric/README @@ -2,7 +2,7 @@ This folder contains some example data and input scripts for the DIELECTRIC pack Nguyen TD, Li H, Bagchi D, Solis FJ, Olvera de la Cruz, Incorporating surface polarization effects into large-scale coarse-grained molecular dynamics simulation, Computer Physics Communications 2019, 241, 80--91. -- data.confined : two point opposite charges confined between two interfaces (epsilon1=2/epsilon2=10/epsilon2=2) +- data.confined : two point opposite charges confined between two interfaces (epsilon2=2/epsilon1=10/epsilon2=2) - data.sphere : two point opposite charges outside a spherical interface (epsilon_in=1/epsilon2=10) - in.confined : read in data.confined @@ -10,7 +10,7 @@ Nguyen TD, Li H, Bagchi D, Solis FJ, Olvera de la Cruz, Incorporating surface po For "atom_style dielectric" the Atoms section in the data file contains 15 following columns: -id mol type q x y z normx normy normz area_per_patch ed em epsilon curvature +id mol type q x y z normx normy normz area/patch ed em epsilon curvature where @@ -34,9 +34,13 @@ where For interface particles, epsilon is set to be em (the mean dielectric value above). -* area_per_patch: the surface area of the patch (element). +* area/patch: the surface area of the patch (element). For real charges, this value is irrelevant, can be 1.0. * curvature: surface mean curvature at the patch. For example, for spherical interfaces, curvature = 1/spherical radius. For planar interfaces, curvature = 0. + +Note that the properties normx, normy, normz, area/patch, ed, em, and curvature are not +used for the non-interface beads. epsilon is used to scale the charge of any non-interface +ion, see the documentation for pair styles with the dielectric suffix and fix polarize. diff --git a/examples/PACKAGES/dielectric/in.confined b/examples/PACKAGES/dielectric/in.confined index beb5b9a2b0..0f9dab7bba 100644 --- a/examples/PACKAGES/dielectric/in.confined +++ b/examples/PACKAGES/dielectric/in.confined @@ -7,7 +7,7 @@ # Dielectric constants can be set to be different from the input data file variable epsilon1 index 20 -variable epsilon2 index 8 +variable epsilon2 index 10 variable data index data.confined diff --git a/fortran/README b/fortran/README index 57d163e197..6a19cd7dc2 100644 --- a/fortran/README +++ b/fortran/README @@ -1,9 +1,9 @@ -This directory contains Fortran code which interface LAMMPS as a library -and allows the LAMMPS library interface to be invoked from Fortran codes. -It requires a Fortran compiler that supports the Fortran 2003 standard. +This directory contains Fortran code that acts as an interface to LAMMPS as a +library and allows the LAMMPS library interface to be invoked from Fortran +code. It requires a Fortran compiler that supports the Fortran 2003 standard. This interface is based on and supersedes the previous Fortran interfaces -in the examples/COUPLE/fortran* folders, but is fully supported by the +in the examples/COUPLE/fortran* folders, but it is fully supported by the LAMMPS developers and included in the documentation and unit testing. Details on this Fortran interface and how to build programs using it diff --git a/fortran/lammps.f90 b/fortran/lammps.f90 index 98378c833a..3355db5209 100644 --- a/fortran/lammps.f90 +++ b/fortran/lammps.f90 @@ -29,9 +29,9 @@ ! MODULE LIBLAMMPS - USE, INTRINSIC :: ISO_C_BINDING, ONLY: c_ptr, c_null_ptr, c_loc, & - c_int, c_int64_t, c_char, c_null_char, c_double, c_size_t, c_f_pointer - USE, INTRINSIC :: ISO_FORTRAN_ENV, ONLY : ERROR_UNIT + USE, INTRINSIC :: ISO_C_BINDING, ONLY: c_ptr, c_null_ptr, C_ASSOCIATED, & + C_LOC, c_int, c_int64_t, c_char, c_null_char, c_double, c_size_t, & + C_F_POINTER IMPLICIT NONE PRIVATE @@ -42,35 +42,107 @@ MODULE LIBLAMMPS ! Must be kept in sync with the equivalent declarations in ! src/library.h and python/lammps/constants.py ! - ! NOT part of the API (the part the user sees) - INTEGER (c_int), PARAMETER :: & - LAMMPS_INT = 0, & ! 32-bit integer (array) - LAMMPS_INT_2D = 1, & ! two-dimensional 32-bit integer array - LAMMPS_DOUBLE = 2, & ! 64-bit double (array) - LAMMPS_DOUBLE_2D = 3, & ! two-dimensional 64-bit double array - LAMMPS_INT64 = 4, & ! 64-bit integer (array) - LAMMPS_INT64_2D = 5, & ! two-dimensional 64-bit integer array - LAMMPS_STRING = 6 ! C-String + ! These are NOT part of the API (the part the user sees) + INTEGER(c_int), PARAMETER :: & + LAMMPS_INT = 0, & ! 32-bit integer (array) + LAMMPS_INT_2D = 1, & ! two-dimensional 32-bit integer array + LAMMPS_DOUBLE = 2, & ! 64-bit double (array) + LAMMPS_DOUBLE_2D = 3, & ! two-dimensional 64-bit double array + LAMMPS_INT64 = 4, & ! 64-bit integer (array) + LAMMPS_INT64_2D = 5, & ! two-dimensional 64-bit integer array + LAMMPS_STRING = 6, & ! C-String + LMP_STYLE_GLOBAL = 0, & ! request global compute/fix/etc. data + LMP_STYLE_ATOM = 1, & ! request per-atom compute/fix/etc. data + LMP_STYLE_LOCAL = 2, & ! request local compute/fix/etc. data + LMP_TYPE_SCALAR = 0, & ! request scalar + LMP_TYPE_VECTOR = 1, & ! request vector + LMP_TYPE_ARRAY = 2, & ! request array + LMP_SIZE_VECTOR = 3, & ! request size of vector + LMP_SIZE_ROWS = 4, & ! request rows (actually columns) + LMP_SIZE_COLS = 5, & ! request colums (actually rows) + LMP_ERROR_WARNING = 0, & ! call Error::warning() + LMP_ERROR_ONE = 1, & ! call Error::one() (from this MPI rank) + LMP_ERROR_ALL = 2, & ! call Error::all() (from all MPI ranks) + LMP_ERROR_WORLD = 4, & ! error on comm->world + LMP_ERROR_UNIVERSE = 8, & ! error on comm->universe + LMP_VAR_EQUAL = 0, & ! equal-style variables (and compatible) + LMP_VAR_ATOM = 1, & ! atom-style variables + LMP_VAR_VECTOR = 2, & ! vector variables + LMP_VAR_STRING = 3 ! string variables (everything else) + + ! "Constants" to use with extract_compute and friends + TYPE lammps_style + INTEGER(c_int) :: global, atom, local + END TYPE lammps_style + + TYPE lammps_type + INTEGER(c_int) :: scalar, vector, array + END TYPE lammps_type TYPE lammps - TYPE(c_ptr) :: handle - CONTAINS - PROCEDURE :: close => lmp_close - PROCEDURE :: error => lmp_error - PROCEDURE :: file => lmp_file - PROCEDURE :: command => lmp_command - PROCEDURE :: commands_list => lmp_commands_list - PROCEDURE :: commands_string => lmp_commands_string - PROCEDURE :: get_natoms => lmp_get_natoms - PROCEDURE :: get_thermo => lmp_get_thermo - PROCEDURE :: extract_box => lmp_extract_box - PROCEDURE :: reset_box => lmp_reset_box - PROCEDURE :: memory_usage => lmp_memory_usage - PROCEDURE :: get_mpi_comm => lmp_get_mpi_comm - PROCEDURE :: extract_setting => lmp_extract_setting - PROCEDURE :: extract_global => lmp_extract_global - PROCEDURE :: version => lmp_version - PROCEDURE :: is_running => lmp_is_running + TYPE(c_ptr) :: handle = c_null_ptr + TYPE(lammps_style) :: style + TYPE(lammps_type) :: type + CONTAINS + PROCEDURE :: close => lmp_close + PROCEDURE :: error => lmp_error + PROCEDURE :: file => lmp_file + PROCEDURE :: command => lmp_command + PROCEDURE :: commands_list => lmp_commands_list + PROCEDURE :: commands_string => lmp_commands_string + PROCEDURE :: get_natoms => lmp_get_natoms + PROCEDURE :: get_thermo => lmp_get_thermo + PROCEDURE :: extract_box => lmp_extract_box + PROCEDURE :: reset_box => lmp_reset_box + PROCEDURE :: memory_usage => lmp_memory_usage + PROCEDURE :: get_mpi_comm => lmp_get_mpi_comm + PROCEDURE :: extract_setting => lmp_extract_setting + PROCEDURE :: extract_global => lmp_extract_global + PROCEDURE :: extract_atom => lmp_extract_atom + PROCEDURE :: extract_compute => lmp_extract_compute + PROCEDURE :: extract_fix => lmp_extract_fix + PROCEDURE :: extract_variable => lmp_extract_variable + PROCEDURE :: set_variable => lmp_set_variable + PROCEDURE, PRIVATE :: lmp_gather_atoms_int + PROCEDURE, PRIVATE :: lmp_gather_atoms_double + GENERIC :: gather_atoms => lmp_gather_atoms_int, & + lmp_gather_atoms_double + PROCEDURE, PRIVATE :: lmp_gather_atoms_concat_int + PROCEDURE, PRIVATE :: lmp_gather_atoms_concat_double + GENERIC :: gather_atoms_concat => lmp_gather_atoms_concat_int, & + lmp_gather_atoms_concat_double + PROCEDURE, PRIVATE :: lmp_gather_atoms_subset_int + PROCEDURE, PRIVATE :: lmp_gather_atoms_subset_double + GENERIC :: gather_atoms_subset => lmp_gather_atoms_subset_int, & + lmp_gather_atoms_subset_double + PROCEDURE, PRIVATE :: lmp_scatter_atoms_int + PROCEDURE, PRIVATE :: lmp_scatter_atoms_double + GENERIC :: scatter_atoms => lmp_scatter_atoms_int, & + lmp_scatter_atoms_double +! + PROCEDURE, PRIVATE :: lmp_scatter_atoms_subset_int + PROCEDURE, PRIVATE :: lmp_scatter_atoms_subset_double + GENERIC :: scatter_atoms_subset => lmp_scatter_atoms_subset_int, & + lmp_scatter_atoms_subset_double + PROCEDURE :: version => lmp_version + PROCEDURE,NOPASS :: get_os_info => lmp_get_os_info + PROCEDURE,NOPASS :: config_has_mpi_support => lmp_config_has_mpi_support + PROCEDURE,NOPASS :: config_has_gzip_support => lmp_config_has_gzip_support + PROCEDURE,NOPASS :: config_has_png_support => lmp_config_has_png_support + PROCEDURE,NOPASS :: config_has_jpeg_support => lmp_config_has_jpeg_support + PROCEDURE,NOPASS :: config_has_ffmpeg_support & + => lmp_config_has_ffmpeg_support + PROCEDURE,NOPASS :: config_has_exceptions => lmp_config_has_exceptions + PROCEDURE,NOPASS :: config_has_package => lmp_config_has_package + PROCEDURE,NOPASS :: config_package_count => lammps_config_package_count + PROCEDURE,NOPASS :: config_package_name => lmp_config_package_name + PROCEDURE,NOPASS :: installed_packages => lmp_installed_packages +! + PROCEDURE :: flush_buffers => lmp_flush_buffers + PROCEDURE :: is_running => lmp_is_running + PROCEDURE :: force_timeout => lmp_force_timeout + PROCEDURE :: has_error => lmp_has_error + PROCEDURE :: get_last_error_message => lmp_get_last_error_message END TYPE lammps INTERFACE lammps @@ -85,30 +157,66 @@ MODULE LIBLAMMPS ENUMERATOR :: DATA_STRING END ENUM + ! Base class for receiving LAMMPS data (to reduce code duplication) + TYPE lammps_data_baseclass + INTEGER(c_int) :: datatype = -1_c_int + ! in case we need to call the Error class in an assignment + CLASS(lammps), POINTER, PRIVATE :: lammps_instance => NULL() + END TYPE lammps_data_baseclass + ! Derived type for receiving LAMMPS data (in lieu of the ability to type cast - ! pointers) - TYPE lammps_data - INTEGER(c_int) :: datatype - INTEGER(c_int), POINTER :: i32 - INTEGER(c_int), DIMENSION(:), POINTER :: i32_vec - INTEGER(c_int64_t), POINTER :: i64 - INTEGER(c_int64_t), DIMENSION(:), POINTER :: i64_vec - REAL(c_double), POINTER :: r64 - REAL(c_double), DIMENSION(:), POINTER :: r64_vec + ! pointers). Used for extract_compute, extract_atom + TYPE, EXTENDS(lammps_data_baseclass) :: lammps_data + INTEGER(c_int), POINTER :: i32 => NULL() + INTEGER(c_int), DIMENSION(:), POINTER :: i32_vec => NULL() + INTEGER(c_int64_t), POINTER :: i64 => NULL() + INTEGER(c_int64_t), DIMENSION(:), POINTER :: i64_vec => NULL() + REAL(c_double), POINTER :: r64 => NULL() + REAL(c_double), DIMENSION(:), POINTER :: r64_vec => NULL() + REAL(c_double), DIMENSION(:,:), POINTER :: r64_mat => NULL() CHARACTER(LEN=:), ALLOCATABLE :: str END TYPE lammps_data + ! Derived type for holding LAMMPS fix data + ! Done this way because fix global data are not pointers, but computed + ! on-the-fly, whereas per-atom and local data are pointers to the actual + ! array. Doing it this way saves the user from having to explicitly + ! deallocate all of the pointers. + TYPE, EXTENDS(lammps_data_baseclass) :: lammps_fix_data + REAL(c_double) :: r64 + REAL(c_double), DIMENSION(:), POINTER :: r64_vec => NULL() + REAL(c_double), DIMENSION(:,:), POINTER :: r64_mat => NULL() + END TYPE lammps_fix_data + + ! Derived type for holding LAMMPS variable data + ! Done this way because extract_variable calculates variable values, it does + ! not return pointers to LAMMPS data. + TYPE, EXTENDS(lammps_data_baseclass) :: lammps_variable_data + REAL(c_double) :: r64 + REAL(c_double), DIMENSION(:), ALLOCATABLE :: r64_vec + CHARACTER(LEN=:), ALLOCATABLE :: str + END TYPE lammps_variable_data + ! This overloads the assignment operator (=) so that assignments of the ! form ! nlocal = extract_global('nlocal') ! which are of the form "pointer to double = type(lammps_data)" result in ! re-associating the pointer on the left with the appropriate piece of - ! LAMMPS data (after checking type-compatibility) + ! LAMMPS data (after checking type-kind-rank compatibility) INTERFACE ASSIGNMENT(=) MODULE PROCEDURE assign_int_to_lammps_data, assign_int64_to_lammps_data, & - assign_intvec_to_lammps_data, & + assign_intvec_to_lammps_data, assign_int64vec_to_lammps_data, & assign_double_to_lammps_data, assign_doublevec_to_lammps_data, & + assign_doublemat_to_lammps_data, & assign_string_to_lammps_data + ! We handle fix data (slightly) differently + MODULE PROCEDURE assign_double_to_lammps_fix_data, & + assign_doublevec_to_lammps_fix_data, & + assign_doublemat_to_lammps_fix_data + ! Variables, too + MODULE PROCEDURE assign_double_to_lammps_variable_data, & + assign_doublevec_to_lammps_variable_data, & + assign_string_to_lammps_variable_data END INTERFACE ! interface definitions for calling functions in library.cpp @@ -163,29 +271,29 @@ MODULE LIBLAMMPS SUBROUTINE lammps_command(handle, cmd) BIND(C) IMPORT :: c_ptr IMPLICIT NONE - TYPE(c_ptr), VALUE :: handle - TYPE(c_ptr), VALUE :: cmd + TYPE(c_ptr), INTENT(IN), VALUE :: handle + TYPE(c_ptr), INTENT(IN), VALUE :: cmd END SUBROUTINE lammps_command SUBROUTINE lammps_commands_list(handle, ncmd, cmds) BIND(C) IMPORT :: c_ptr, c_int IMPLICIT NONE - TYPE(c_ptr), VALUE :: handle - INTEGER(c_int), VALUE, INTENT(IN) :: ncmd + TYPE(c_ptr), INTENT(IN), VALUE :: handle + INTEGER(c_int), INTENT(IN), VALUE :: ncmd TYPE(c_ptr), DIMENSION(*), INTENT(IN) :: cmds END SUBROUTINE lammps_commands_list SUBROUTINE lammps_commands_string(handle, str) BIND(C) IMPORT :: c_ptr IMPLICIT NONE - TYPE(c_ptr), VALUE :: handle - TYPE(c_ptr), VALUE :: str + TYPE(c_ptr), INTENT(IN), VALUE :: handle + TYPE(c_ptr), INTENT(IN), VALUE :: str END SUBROUTINE lammps_commands_string FUNCTION lammps_get_natoms(handle) BIND(C) IMPORT :: c_ptr, c_double IMPLICIT NONE - TYPE(c_ptr), VALUE :: handle + TYPE(c_ptr), INTENT(IN), VALUE :: handle REAL(c_double) :: lammps_get_natoms END FUNCTION lammps_get_natoms @@ -193,89 +301,156 @@ MODULE LIBLAMMPS IMPORT :: c_ptr, c_double IMPLICIT NONE REAL(c_double) :: lammps_get_thermo - TYPE(c_ptr), VALUE :: handle - TYPE(c_ptr), VALUE :: name + TYPE(c_ptr), INTENT(IN), VALUE :: handle + TYPE(c_ptr), INTENT(IN), VALUE :: name END FUNCTION lammps_get_thermo SUBROUTINE lammps_extract_box(handle,boxlo,boxhi,xy,yz,xz,pflags, & boxflag) BIND(C) IMPORT :: c_ptr, c_double, c_int IMPLICIT NONE - TYPE(c_ptr), VALUE :: handle, boxlo, boxhi, xy, yz, xz, pflags, & - boxflag + TYPE(c_ptr), INTENT(IN), VALUE :: handle, boxlo, boxhi, xy, yz, xz, & + pflags, boxflag END SUBROUTINE lammps_extract_box SUBROUTINE lammps_reset_box(handle,boxlo,boxhi,xy,yz,xz) BIND(C) IMPORT :: c_ptr, c_double IMPLICIT NONE - TYPE(c_ptr), VALUE :: handle - REAL(c_double), DIMENSION(3) :: boxlo, boxhi - REAL(c_double), VALUE :: xy, yz, xz + TYPE(c_ptr), INTENT(IN), VALUE :: handle + REAL(c_double), DIMENSION(3), INTENT(IN) :: boxlo, boxhi + REAL(c_double), INTENT(IN), VALUE :: xy, yz, xz END SUBROUTINE lammps_reset_box SUBROUTINE lammps_memory_usage(handle,meminfo) BIND(C) IMPORT :: c_ptr, c_double IMPLICIT NONE - TYPE(c_ptr), VALUE :: handle - REAL(c_double), DIMENSION(*) :: meminfo + TYPE(c_ptr), INTENT(IN), VALUE :: handle + REAL(c_double), DIMENSION(*), INTENT(OUT) :: meminfo END SUBROUTINE lammps_memory_usage FUNCTION lammps_get_mpi_comm(handle) BIND(C) IMPORT :: c_ptr, c_int IMPLICIT NONE - TYPE(c_ptr), VALUE :: handle + TYPE(c_ptr), INTENT(IN), VALUE :: handle INTEGER(c_int) :: lammps_get_mpi_comm END FUNCTION lammps_get_mpi_comm FUNCTION lammps_extract_setting(handle,keyword) BIND(C) IMPORT :: c_ptr, c_int IMPLICIT NONE - TYPE(c_ptr), VALUE :: handle, keyword + TYPE(c_ptr), INTENT(IN), VALUE :: handle, keyword INTEGER(c_int) :: lammps_extract_setting END FUNCTION lammps_extract_setting FUNCTION lammps_extract_global_datatype(handle,name) BIND(C) IMPORT :: c_ptr, c_int IMPLICIT NONE - TYPE(c_ptr), VALUE :: handle, name + TYPE(c_ptr), INTENT(IN), VALUE :: handle, name INTEGER(c_int) :: lammps_extract_global_datatype END FUNCTION lammps_extract_global_datatype - FUNCTION c_strlen (str) BIND(C,name='strlen') + FUNCTION c_strlen(str) BIND(C,name='strlen') IMPORT :: c_ptr, c_size_t IMPLICIT NONE - TYPE(c_ptr), VALUE :: str + TYPE(c_ptr), INTENT(IN), VALUE :: str INTEGER(c_size_t) :: c_strlen END FUNCTION c_strlen FUNCTION lammps_extract_global(handle, name) BIND(C) IMPORT :: c_ptr IMPLICIT NONE - TYPE(c_ptr), VALUE :: handle, name + TYPE(c_ptr), INTENT(IN), VALUE :: handle, name TYPE(c_ptr) :: lammps_extract_global END FUNCTION lammps_extract_global - !INTEGER (c_int) FUNCTION lammps_extract_atom_datatype + FUNCTION lammps_extract_atom_datatype(handle, name) BIND(C) + IMPORT :: c_ptr, c_int + IMPLICIT NONE + TYPE(c_ptr), INTENT(IN), VALUE :: handle, name + INTEGER(c_int) :: lammps_extract_atom_datatype + END FUNCTION lammps_extract_atom_datatype - !(generic) lammps_extract_atom + FUNCTION lammps_extract_atom(handle, name) BIND(C) + IMPORT :: c_ptr + IMPLICIT NONE + TYPE(c_ptr), INTENT(IN), VALUE :: handle, name + TYPE(c_ptr) :: lammps_extract_atom + END FUNCTION lammps_extract_atom - !(generic) lammps_extract_compute + FUNCTION lammps_extract_compute(handle, id, style, type) BIND(C) + IMPORT :: c_ptr, c_int + IMPLICIT NONE + TYPE(c_ptr), INTENT(IN), VALUE :: handle, id + INTEGER(c_int), INTENT(IN), VALUE :: style, type + TYPE(c_ptr) :: lammps_extract_compute + END FUNCTION lammps_extract_compute - !(generic) lammps_extract_fix + FUNCTION lammps_extract_fix(handle, id, style, type, nrow, ncol) BIND(C) + IMPORT :: c_ptr, c_int + IMPLICIT NONE + TYPE(c_ptr), INTENT(IN), VALUE :: handle, id + INTEGER(c_int), INTENT(IN), VALUE :: style, type, nrow, ncol + TYPE(c_ptr) :: lammps_extract_fix + END FUNCTION lammps_extract_fix - !(generic) lammps_extract_variable + FUNCTION lammps_extract_variable_datatype(handle,name) BIND(C) + IMPORT :: c_ptr, c_int + IMPLICIT NONE + TYPE(c_ptr), INTENT(IN), VALUE :: handle, name + INTEGER(c_int) :: lammps_extract_variable_datatype + END FUNCTION lammps_extract_variable_datatype - !INTEGER (c_int) lammps_set_variable + FUNCTION lammps_extract_variable(handle, name, group) BIND(C) + IMPORT :: c_ptr, c_int + IMPLICIT NONE + TYPE(c_ptr), INTENT(IN), VALUE :: handle, name, group + TYPE(c_ptr) :: lammps_extract_variable + END FUNCTION lammps_extract_variable - !SUBROUTINE lammps_gather_atoms + FUNCTION lammps_set_variable(handle, name, str) BIND(C) + IMPORT :: c_int, c_ptr + IMPLICIT NONE + TYPE(c_ptr), VALUE :: handle, name, str + INTEGER(c_int) :: lammps_set_variable + END FUNCTION lammps_set_variable - !SUBROUTINE lammps_gather_atoms_concat + SUBROUTINE lammps_gather_atoms(handle, name, type, count, data) BIND(C) + IMPORT :: c_int, c_ptr + IMPLICIT NONE + TYPE(c_ptr), VALUE :: handle, name, data + INTEGER(c_int), VALUE :: type, count + END SUBROUTINE lammps_gather_atoms - !SUBROUTINE lammps_gather_atoms_subset + SUBROUTINE lammps_gather_atoms_concat(handle, name, type, count, data) & + BIND(C) + IMPORT :: c_ptr, c_int + IMPLICIT NONE + TYPE(c_ptr), VALUE :: handle, name, data + INTEGER(c_int), VALUE :: type, count + END SUBROUTINE lammps_gather_atoms_concat - !SUBROUTINE lammps_scatter_atoms + SUBROUTINE lammps_gather_atoms_subset(handle, name, type, count, ndata, & + ids, data) BIND(C) + IMPORT :: c_ptr, c_int + IMPLICIT NONE + TYPE(c_ptr), VALUE :: handle, name, ids, data + INTEGER(c_int), VALUE :: type, count, ndata + END SUBROUTINE lammps_gather_atoms_subset - !SUBROUTINE lammps_scatter_atoms_subset + SUBROUTINE lammps_scatter_atoms(handle, name, type, count, data) BIND(C) + IMPORT :: c_ptr, c_int + IMPLICIT NONE + TYPE(c_ptr), VALUE :: handle, name, data + INTEGER(c_int), VALUE :: type, count + END SUBROUTINE lammps_scatter_atoms + + SUBROUTINE lammps_scatter_atoms_subset(handle, name, type, count, & + ndata, ids, data) BIND(C) + IMPORT :: c_ptr, c_int + IMPLICIT NONE + TYPE(c_ptr), VALUE :: handle, name, ids, data + INTEGER(c_int), VALUE :: count, ndata, type + END SUBROUTINE lammps_scatter_atoms_subset !SUBROUTINE lammps_gather_bonds @@ -288,15 +463,15 @@ MODULE LIBLAMMPS !SUBROUTINE lammps_scatter_subset !(generic / id, type, and image are special) / requires LAMMPS_BIGBIG - !INTEGER (C_int) FUNCTION lammps_create_atoms + !INTEGER(c_int) FUNCTION lammps_create_atoms - !INTEGER (C_int) FUNCTION lammps_find_pair_neighlist + !INTEGER(c_int) FUNCTION lammps_find_pair_neighlist - !INTEGER (C_int) FUNCTION lammps_find_fix_neighlist + !INTEGER(c_int) FUNCTION lammps_find_fix_neighlist - !INTEGER (C_int) FUNCTION lammps_find_compute_neighlist + !INTEGER(c_int) FUNCTION lammps_find_compute_neighlist - !INTEGER (C_int) FUNCTION lammps_neighlist_num_elements + !INTEGER(c_int) FUNCTION lammps_neighlist_num_elements !SUBROUTINE lammps_neighlist_element_neighbors @@ -307,35 +482,87 @@ MODULE LIBLAMMPS INTEGER(c_int) :: lammps_version END FUNCTION lammps_version - !SUBROUTINE lammps_get_os_info + SUBROUTINE lammps_get_os_info(buffer, buf_size) BIND(C) + IMPORT :: c_ptr, c_int + IMPLICIT NONE + TYPE(c_ptr), VALUE :: buffer + INTEGER(c_int), VALUE :: buf_size + END SUBROUTINE lammps_get_os_info - !LOGICAL FUNCTION lammps_config_has_mpi_support - !LOGICAL FUNCTION lammps_config_has_gzip_support - !LOGICAL FUNCTION lammps_config_has_png_support - !LOGICAL FUNCTION lammps_config_has_jpeg_support - !LOGICAL FUNCTION lammps_config_has_ffmpeg_support - !LOGICAL FUNCTION lammps_config_has_exceptions - !LOGICAL FUNCTION lammps_config_has_package - !INTEGER (C_int) FUNCTION lammps_config_package_count - !SUBROUTINE lammps_config_package_name + FUNCTION lammps_config_has_mpi_support() BIND(C) + IMPORT :: c_int + IMPLICIT NONE + INTEGER(c_int) :: lammps_config_has_mpi_support + END FUNCTION lammps_config_has_mpi_support + + FUNCTION lammps_config_has_gzip_support() BIND(C) + IMPORT :: c_int + IMPLICIT NONE + INTEGER(c_int) :: lammps_config_has_gzip_support + END FUNCTION lammps_config_has_gzip_support + + FUNCTION lammps_config_has_png_support() BIND(C) + IMPORT :: c_int + IMPLICIT NONE + INTEGER(c_int) :: lammps_config_has_png_support + END FUNCTION lammps_config_has_png_support + + FUNCTION lammps_config_has_jpeg_support() BIND(C) + IMPORT :: c_int + IMPLICIT NONE + INTEGER(c_int) :: lammps_config_has_jpeg_support + END FUNCTION lammps_config_has_jpeg_support + + FUNCTION lammps_config_has_ffmpeg_support() BIND(C) + IMPORT :: c_int + IMPLICIT NONE + INTEGER(c_int) :: lammps_config_has_ffmpeg_support + END FUNCTION lammps_config_has_ffmpeg_support + + FUNCTION lammps_config_has_exceptions() BIND(C) + IMPORT :: c_int + IMPLICIT NONE + INTEGER(c_int) :: lammps_config_has_exceptions + END FUNCTION lammps_config_has_exceptions + + FUNCTION lammps_config_has_package(name) BIND(C) + IMPORT :: c_int, c_ptr + IMPLICIT NONE + TYPE(c_ptr), VALUE :: name + INTEGER(c_int) :: lammps_config_has_package + END FUNCTION lammps_config_has_package + + FUNCTION lammps_config_package_count() BIND(C) + IMPORT :: c_int + IMPLICIT NONE + INTEGER(c_int) :: lammps_config_package_count + END FUNCTION lammps_config_package_count + + FUNCTION lammps_config_package_name(idx, buffer, buf_size) BIND(C) + IMPORT :: c_int, c_ptr + IMPLICIT NONE + INTEGER(c_int) :: lammps_config_package_name + INTEGER(c_int), VALUE :: idx, buf_size + TYPE(c_ptr), VALUE :: buffer + END FUNCTION lammps_config_package_name !LOGICAL FUNCTION lammps_config_accelerator !LOGICAL FUNCTION lammps_has_gpu_device !SUBROUTINE lammps_get_gpu_device !LOGICAL FUNCTION lammps_has_id - !INTEGER (C_int) FUNCTION lammps_id_count + !INTEGER(c_int) FUNCTION lammps_id_count !SUBROUTINE lammps_id_name - !INTEGER (C_int) FUNCTION lammps_plugin_count + !INTEGER(c_int) FUNCTION lammps_plugin_count !SUBROUTINE lammps_plugin_name !Both of these use LAMMPS_BIGBIG - !INTEGER (LAMMPS_imageint) FUNCTION lammps_encode_image_flags + !INTEGER(LAMMPS_imageint) FUNCTION lammps_encode_image_flags !SUBROUTINE lammps_decode_image_flags !SUBROUTINE lammps_set_fix_external_callback ! may have trouble.... - !FUNCTION lammps_fix_external_get_force() ! returns real(c_double) (:) + !FUNCTION lammps_fix_external_get_force() ! returns real(c_double)(:) !SUBROUTINE lammps_fix_external_set_energy_global !SUBROUTINE lammps_fix_external_set_energy_peratom @@ -344,7 +571,11 @@ MODULE LIBLAMMPS !SUBROUTINE lammps_fix_external_set_vector_length !SUBROUTINE lammps_fix_external_set_vector - !SUBROUTINE lammps_flush_buffers + SUBROUTINE lammps_flush_buffers(handle) BIND(C) + IMPORT :: c_ptr + IMPLICIT NONE + TYPE(c_ptr), VALUE :: handle + END SUBROUTINE lammps_flush_buffers FUNCTION lammps_malloc(size) BIND(C, name='malloc') IMPORT :: c_ptr, c_size_t @@ -365,11 +596,25 @@ MODULE LIBLAMMPS TYPE(c_ptr), VALUE :: handle END FUNCTION lammps_is_running - !SUBROUTINE lammps_force_timeout + SUBROUTINE lammps_force_timeout(handle) BIND(C) + IMPORT :: c_ptr + IMPLICIT NONE + TYPE(c_ptr), VALUE :: handle + END SUBROUTINE lammps_force_timeout - !LOGICAL FUNCTION lammps_has_error + INTEGER(c_int) FUNCTION lammps_has_error(handle) BIND(C) + IMPORT :: c_ptr, c_int + IMPLICIT NONE + TYPE(c_ptr), VALUE :: handle + END FUNCTION lammps_has_error - !INTEGER (c_int) FUNCTION lammps_get_last_error_message + INTEGER(c_int) FUNCTION lammps_get_last_error_message & + (handle, buffer, buf_size) BIND(C) + IMPORT :: c_ptr, c_int, c_char + IMPLICIT NONE + TYPE(c_ptr), VALUE :: handle, buffer + INTEGER(c_int), VALUE :: buf_size + END FUNCTION lammps_get_last_error_message END INTERFACE @@ -409,11 +654,19 @@ CONTAINS CALL lammps_free(argv(i)) END DO DEALLOCATE(argv) + + ! Assign style and type members so lmp_open%style%global and such work + lmp_open%style%global = LMP_STYLE_GLOBAL + lmp_open%style%atom = LMP_STYLE_ATOM + lmp_open%style%local = LMP_STYLE_LOCAL + lmp_open%type%scalar = LMP_TYPE_SCALAR + lmp_open%type%vector = LMP_TYPE_VECTOR + lmp_open%type%array = LMP_TYPE_ARRAY END FUNCTION lmp_open ! Combined Fortran wrapper around lammps_close() and lammps_mpi_finalize() SUBROUTINE lmp_close(self, finalize) - CLASS(lammps) :: self + CLASS(lammps), INTENT(IN) :: self LOGICAL, INTENT(IN), OPTIONAL :: finalize CALL lammps_close(self%handle) @@ -440,7 +693,7 @@ CONTAINS ! equivalent function to lammps_file() SUBROUTINE lmp_file(self, filename) - CLASS(lammps) :: self + CLASS(lammps), INTENT(IN) :: self CHARACTER(len=*) :: filename TYPE(c_ptr) :: str @@ -451,7 +704,7 @@ CONTAINS ! equivalent function to lammps_command() SUBROUTINE lmp_command(self, cmd) - CLASS(lammps) :: self + CLASS(lammps), INTENT(IN) :: self CHARACTER(len=*) :: cmd TYPE(c_ptr) :: str @@ -462,7 +715,7 @@ CONTAINS ! equivalent function to lammps_commands_list() SUBROUTINE lmp_commands_list(self, cmds) - CLASS(lammps) :: self + CLASS(lammps), INTENT(IN) :: self CHARACTER(LEN=*), INTENT(IN), OPTIONAL :: cmds(:) TYPE(c_ptr), ALLOCATABLE :: cmdv(:) INTEGER :: i, ncmd @@ -485,7 +738,7 @@ CONTAINS ! equivalent function to lammps_commands_string() SUBROUTINE lmp_commands_string(self, str) - CLASS(lammps) :: self + CLASS(lammps), INTENT(IN) :: self CHARACTER(len=*) :: str TYPE(c_ptr) :: tmp @@ -495,17 +748,17 @@ CONTAINS END SUBROUTINE lmp_commands_string ! equivalent function to lammps_get_natoms - DOUBLE PRECISION FUNCTION lmp_get_natoms(self) + REAL(c_double) FUNCTION lmp_get_natoms(self) CLASS(lammps) :: self lmp_get_natoms = lammps_get_natoms(self%handle) END FUNCTION lmp_get_natoms ! equivalent function to lammps_get_thermo - REAL (C_double) FUNCTION lmp_get_thermo(self,name) + REAL(c_double) FUNCTION lmp_get_thermo(self,name) CLASS(lammps), INTENT(IN) :: self CHARACTER(LEN=*) :: name - TYPE(C_ptr) :: Cname + TYPE(c_ptr) :: Cname Cname = f2c_string(name) lmp_get_thermo = lammps_get_thermo(self%handle, Cname) @@ -518,27 +771,27 @@ CONTAINS REAL(c_double), INTENT(OUT), TARGET, OPTIONAL :: boxlo(3), boxhi(3) REAL(c_double), INTENT(OUT), TARGET, OPTIONAL :: xy, yz, xz LOGICAL, INTENT(OUT), OPTIONAL :: pflags(3), boxflag - INTEGER(c_int), TARGET :: C_pflags(3), C_boxflag - TYPE (c_ptr) :: ptr(7) + INTEGER(c_int), TARGET :: c_pflags(3), c_boxflag + TYPE(c_ptr) :: ptr(7) ptr = c_null_ptr - IF ( PRESENT(boxlo) ) ptr(1) = C_LOC(boxlo(1)) - IF ( PRESENT(boxhi) ) ptr(2) = C_LOC(boxhi(1)) - IF ( PRESENT(xy) ) ptr(3) = C_LOC(xy) - IF ( PRESENT(yz) ) ptr(4) = C_LOC(yz) - IF ( PRESENT(xz) ) ptr(5) = C_LOC(xz) - IF ( PRESENT(pflags) ) ptr(6) = C_LOC(C_pflags(1)) - IF ( PRESENT(boxflag) ) ptr(7) = C_LOC(C_boxflag) + IF (PRESENT(boxlo)) ptr(1) = C_LOC(boxlo(1)) + IF (PRESENT(boxhi)) ptr(2) = C_LOC(boxhi(1)) + IF (PRESENT(xy)) ptr(3) = C_LOC(xy) + IF (PRESENT(yz)) ptr(4) = C_LOC(yz) + IF (PRESENT(xz)) ptr(5) = C_LOC(xz) + IF (PRESENT(pflags)) ptr(6) = C_LOC(c_pflags(1)) + IF (PRESENT(boxflag)) ptr(7) = C_LOC(c_boxflag) CALL lammps_extract_box(self%handle, ptr(1), ptr(2), ptr(3), ptr(4), & ptr(5), ptr(6), ptr(7)) - IF ( PRESENT(pflags) ) pflags = ( C_pflags /= 0_C_int ) - IF ( PRESENT(boxflag) ) boxflag = ( C_boxflag /= 0_C_int ) + IF (PRESENT(pflags)) pflags = (c_pflags /= 0_c_int) + IF (PRESENT(boxflag)) boxflag = (c_boxflag /= 0_c_int) END SUBROUTINE lmp_extract_box ! equivalent function to lammps_reset_box SUBROUTINE lmp_reset_box(self, boxlo, boxhi, xy, yz, xz) CLASS(lammps), INTENT(IN) :: self - REAL(C_double), INTENT(IN) :: boxlo(3), boxhi(3), xy, yz, xz + REAL(c_double), INTENT(IN) :: boxlo(3), boxhi(3), xy, yz, xz CALL lammps_reset_box(self%handle, boxlo, boxhi, xy, yz, xz) END SUBROUTINE lmp_reset_box @@ -547,7 +800,7 @@ CONTAINS SUBROUTINE lmp_memory_usage(self,meminfo) CLASS(lammps), INTENT(IN) :: self INTEGER, PARAMETER :: MEMINFO_ELEM = 3 - REAL (c_double), DIMENSION(MEMINFO_ELEM), INTENT(OUT) :: meminfo + REAL(c_double), DIMENSION(MEMINFO_ELEM), INTENT(OUT) :: meminfo CALL lammps_memory_usage(self%handle,meminfo) END SUBROUTINE lmp_memory_usage @@ -560,7 +813,7 @@ CONTAINS END FUNCTION lmp_get_mpi_comm ! equivalent function to lammps_extract_setting - INTEGER (c_int) FUNCTION lmp_extract_setting(self, keyword) + INTEGER(c_int) FUNCTION lmp_extract_setting(self, keyword) CLASS(lammps), INTENT(IN) :: self CHARACTER(LEN=*), INTENT(IN) :: keyword TYPE(c_ptr) :: Ckeyword @@ -573,8 +826,8 @@ CONTAINS ! equivalent function to lammps_extract_global ! the assignment is actually overloaded so as to bind the pointers to ! lammps data based on the information available from LAMMPS - FUNCTION lmp_extract_global(self, name) RESULT (global_data) - CLASS(lammps), INTENT(IN) :: self + FUNCTION lmp_extract_global(self, name) RESULT(global_data) + CLASS(lammps), INTENT(IN), TARGET :: self CHARACTER(LEN=*), INTENT(IN) :: name TYPE(lammps_data) :: global_data @@ -601,9 +854,10 @@ CONTAINS Cptr = lammps_extract_global(self%handle, Cname) CALL lammps_free(Cname) + global_data%lammps_instance => self SELECT CASE (datatype) CASE (LAMMPS_INT) - IF ( length == 1 ) THEN + IF (length == 1) THEN global_data%datatype = DATA_INT CALL C_F_POINTER(Cptr, global_data%i32) ELSE @@ -611,7 +865,7 @@ CONTAINS CALL C_F_POINTER(Cptr, global_data%i32_vec, [length]) END IF CASE (LAMMPS_INT64) - IF ( length == 1 ) THEN + IF (length == 1) THEN global_data%datatype = DATA_INT64 CALL C_F_POINTER(Cptr, global_data%i64) ELSE @@ -619,7 +873,7 @@ CONTAINS CALL C_F_POINTER(Cptr, global_data%i64_vec, [length]) END IF CASE (LAMMPS_DOUBLE) - IF ( length == 1 ) THEN + IF (length == 1) THEN global_data%datatype = DATA_DOUBLE CALL C_F_POINTER(Cptr, global_data%r64) ELSE @@ -630,129 +884,1052 @@ CONTAINS global_data%datatype = DATA_STRING length = c_strlen(Cptr) CALL C_F_POINTER(Cptr, Fptr, [length]) - ALLOCATE ( CHARACTER(LEN=length) :: global_data%str ) - FORALL ( I=1:length ) + ALLOCATE(CHARACTER(LEN=length) :: global_data%str) + DO i = 1, length global_data%str(i:i) = Fptr(i) - END FORALL - CASE DEFAULT - ! FIXME convert to use symbolic constants later - CALL lmp_error(self, 6, 'Unknown pointer type in extract_global') + END DO + CASE DEFAULT + CALL lmp_error(self, LMP_ERROR_ALL + LMP_ERROR_WORLD, & + 'Unknown pointer type in extract_global') END SELECT END FUNCTION - ! equivalent function to lammps_version() + ! equivalent function to lammps_extract_atom + ! the assignment is actually overloaded so as to bind the pointers to + ! lammps data based on the information available from LAMMPS + FUNCTION lmp_extract_atom(self, name) RESULT(peratom_data) + CLASS(lammps), INTENT(IN), TARGET :: self + CHARACTER(LEN=*), INTENT(IN) :: name + TYPE(lammps_data) :: peratom_data + + INTEGER(c_int) :: datatype + TYPE(c_ptr) :: Cname, Cptr + INTEGER(c_int) :: ntypes, nmax + INTEGER :: nrows, ncols + REAL(c_double), DIMENSION(:), POINTER :: dummy + TYPE(c_ptr), DIMENSION(:), POINTER :: Catomptr + CHARACTER(LEN=:), ALLOCATABLE :: error_msg + + nmax = lmp_extract_setting(self, 'nmax') + ntypes = lmp_extract_setting(self, 'ntypes') + Cname = f2c_string(name) + datatype = lammps_extract_atom_datatype(self%handle, Cname) + Cptr = lammps_extract_atom(self%handle, Cname) + CALL lammps_free(Cname) + + SELECT CASE (name) + CASE ('mass') + ncols = ntypes + 1 + nrows = 1 + CASE ('x','v','f','mu','omega','torque','angmom') + ncols = nmax + nrows = 3 + CASE DEFAULT + ncols = nmax + nrows = 1 + END SELECT + + peratom_data%lammps_instance => self + SELECT CASE (datatype) + CASE (LAMMPS_INT) + peratom_data%datatype = DATA_INT_1D + CALL C_F_POINTER(Cptr, peratom_data%i32_vec, [ncols]) + CASE (LAMMPS_INT64) + peratom_data%datatype = DATA_INT64_1D + CALL C_F_POINTER(Cptr, peratom_data%i64_vec, [ncols]) + CASE (LAMMPS_DOUBLE) + peratom_data%datatype = DATA_DOUBLE_1D + IF (name == 'mass') THEN + CALL C_F_POINTER(Cptr, dummy, [ncols]) + peratom_data%r64_vec(0:) => dummy + ELSE + CALL C_F_POINTER(Cptr, peratom_data%r64_vec, [ncols]) + END IF + CASE (LAMMPS_DOUBLE_2D) + peratom_data%datatype = DATA_DOUBLE_2D + ! First, we dereference the void** pointer to point to the void* + CALL C_F_POINTER(Cptr, Catomptr, [ncols]) + ! Catomptr(1) now points to the first element of the array + CALL C_F_POINTER(Catomptr(1), peratom_data%r64_mat, [nrows,ncols]) + CASE (-1) + CALL lmp_error(self, LMP_ERROR_ALL + LMP_ERROR_WORLD, & + 'per-atom property ' // name // 'not found in extract_setting') + CASE DEFAULT + WRITE(error_msg,'(A,I0,A)') 'return value ', datatype, & + ' from lammps_extract_atom_datatype not known [Fortran/extract_atom]' + CALL lmp_error(self, LMP_ERROR_ALL + LMP_ERROR_WORLD, error_msg) + END SELECT + END FUNCTION lmp_extract_atom + + ! equivalent function to lammps_extract_compute + ! the assignment operator is overloaded so as to bind the pointers to + ! lammps data based on the information available from LAMMPS + FUNCTION lmp_extract_compute(self, id, style, type) RESULT(compute_data) + CLASS(lammps), INTENT(IN), TARGET :: self + CHARACTER(LEN=*), INTENT(IN) :: id + INTEGER(c_int), INTENT(IN) :: style, type + TYPE(lammps_data) :: compute_data + + TYPE(c_ptr) :: Cid, Cptr, Ctemp + INTEGER :: nrows, ncols, length + INTEGER(c_int), POINTER :: temp + TYPE(c_ptr), DIMENSION(:), POINTER :: Ccomputeptr + + Cid = f2c_string(id) + Cptr = lammps_extract_compute(self%handle, Cid, style, type) + + IF (.NOT. C_ASSOCIATED(Cptr)) THEN + CALL lmp_error(self, LMP_ERROR_ALL + LMP_ERROR_WORLD, & + 'Pointer from LAMMPS is NULL [Fortran/extract_compute]') + END IF + + ! Remember that rows and columns in C are transposed in Fortran! + compute_data%lammps_instance => self + SELECT CASE (type) + CASE (LMP_TYPE_SCALAR) + compute_data%datatype = DATA_DOUBLE + length = 1 + nrows = 1 + ncols = 1 + CALL C_F_POINTER(Cptr, compute_data%r64) + CASE (LMP_TYPE_VECTOR) + compute_data%datatype = DATA_DOUBLE_1D + IF (style == LMP_STYLE_ATOM) THEN + length = self%extract_setting('nmax') + ELSE + Ctemp = lammps_extract_compute(self%handle,Cid,style,LMP_SIZE_VECTOR) + CALL C_F_POINTER(Ctemp, temp) + length = temp + END IF + CALL C_F_POINTER(Cptr, compute_data%r64_vec, [length]) + CASE (LMP_TYPE_ARRAY) + compute_data%datatype = DATA_DOUBLE_2D + IF (style == LMP_STYLE_ATOM) THEN + ncols = self%extract_setting('nmax') + Ctemp = lammps_extract_compute(self%handle,Cid,style,LMP_SIZE_COLS) + CALL C_F_POINTER(Ctemp, temp) + nrows = temp + ELSE + Ctemp = lammps_extract_compute(self%handle,Cid,style,LMP_SIZE_ROWS) + CALL C_F_POINTER(Ctemp, temp) + ncols = temp + Ctemp = lammps_extract_compute(self%handle,Cid,style,LMP_SIZE_COLS) + CALL C_F_POINTER(Ctemp, temp) + nrows = temp + END IF + ! First, we dereference the void** pointer to point to a void* pointer + CALL C_F_POINTER(Cptr, Ccomputeptr, [ncols]) + ! Ccomputeptr(1) now points to the first element of the array + CALL C_F_POINTER(Ccomputeptr(1), compute_data%r64_mat, [nrows, ncols]) + CASE DEFAULT + CALL lmp_error(self, LMP_ERROR_ALL + LMP_ERROR_WORLD, & + 'unknown type value passed to extract_compute [Fortran API]') + END SELECT + CALL lammps_free(Cid) + END FUNCTION lmp_extract_compute + + FUNCTION lmp_extract_fix(self, id, style, type, nrow, ncol) RESULT(fix_data) + CLASS(lammps), INTENT(IN), TARGET :: self + CHARACTER(LEN=*), INTENT(IN) :: id + INTEGER(c_int), INTENT(IN) :: style, type + INTEGER(c_int), INTENT(IN), OPTIONAL :: nrow, ncol + TYPE(lammps_fix_data) :: fix_data + + TYPE(c_ptr) :: Cid, Cptr, Ctemp + TYPE(c_ptr), DIMENSION(:), POINTER :: Cfixptr + INTEGER(c_int) :: Cnrow, Cncol + REAL(c_double), POINTER :: Fptr + INTEGER :: nrows, ncols + INTEGER(c_int), POINTER :: temp + + ! We transpose ncol and nrow so the array appears to be transposed for + ! global data, as it would be if we could access the C++ array directly + Cnrow = -1 + Cncol = -1 + IF (PRESENT(nrow)) THEN + IF (.NOT. PRESENT(ncol)) THEN + ! Presumably the argument that's there is the vector length + Cnrow = nrow - 1_c_int + Cncol = -1_c_int + ELSE + ! Otherwise, the array is transposed, so...reverse the indices + Cncol = nrow - 1_c_int + END IF + END IF + + IF (PRESENT(ncol)) Cnrow = ncol - 1_c_int + + Cid = f2c_string(id) + Cptr = lammps_extract_fix(self%handle, Cid, style, type, Cnrow, Cncol) + IF (.NOT. C_ASSOCIATED(Cptr)) THEN + CALL lmp_error(self, LMP_ERROR_ALL + LMP_ERROR_WORLD, & + 'Pointer from LAMMPS is NULL for fix id "' // id & + // '" [Fortran/extract_fix]') + END IF + + fix_data%lammps_instance => self + SELECT CASE (style) + CASE (LMP_STYLE_GLOBAL) + fix_data%datatype = DATA_DOUBLE + CALL C_F_POINTER(Cptr, Fptr) + fix_data%r64 = Fptr + CALL lammps_free(Cptr) + CASE (LMP_STYLE_ATOM, LMP_STYLE_LOCAL) + SELECT CASE (type) + CASE (LMP_TYPE_SCALAR) + CALL lmp_error(self, LMP_ERROR_ALL + LMP_ERROR_WORLD, & + 'There is no such thing as a per-atom or local scalar& + & [Fortran/extract_fix]') + CASE (LMP_TYPE_VECTOR) + fix_data%datatype = DATA_DOUBLE_1D + IF (STYLE == LMP_STYLE_ATOM) THEN + nrows = self%extract_setting('nmax') + ELSE + Ctemp = lammps_extract_fix(self%handle, Cid, style, & + LMP_SIZE_VECTOR, 0_c_int,0_c_int) + CALL C_F_POINTER(Ctemp, temp) + nrows = temp + END IF + CALL C_F_POINTER(Cptr, fix_data%r64_vec, [nrows]) + CASE (LMP_TYPE_ARRAY) + fix_data%datatype = DATA_DOUBLE_2D + IF (STYLE == LMP_STYLE_ATOM) THEN + ! Fortran array is transposed relative to C + ncols = self%extract_setting('nmax') + Ctemp = lammps_extract_fix(self%handle, Cid, style, & + LMP_SIZE_COLS, 0_c_int,0_c_int) + CALL C_F_POINTER(Ctemp, temp) + nrows = temp + ELSE + ! Fortran array is transposed relative to C + Ctemp = lammps_extract_fix(self%handle, Cid, style, & + LMP_SIZE_COLS, 0_c_int,0_c_int) + CALL C_F_POINTER(Ctemp, temp) + nrows = temp + Ctemp = lammps_extract_fix(self%handle, Cid, style, & + LMP_SIZE_ROWS, 0_c_int,0_c_int) + CALL C_F_POINTER(Ctemp, temp) + ncols = temp + END IF + ! First, we dereference the void** to point to a void* pointer + CALL C_F_POINTER(Cptr, Cfixptr, [ncols]) + ! Cfixptr(1) now points to the first element of the array + CALL C_F_POINTER(Cfixptr(1), fix_data%r64_mat, [nrows, ncols]) + CASE DEFAULT + CALL lmp_error(self, LMP_ERROR_ALL + LMP_ERROR_WORLD, & + 'unknown type value passed to extract_fix [Fortran API]') + END SELECT + CASE DEFAULT + CALL lmp_error(self, LMP_ERROR_ALL + LMP_ERROR_WORLD, & + 'unknown style value passed to extract_fix [Fortran API]') + END SELECT + CALL lammps_free(Cid) + END FUNCTION lmp_extract_fix + + ! equivalent function to lammps_extract_variable + FUNCTION lmp_extract_variable(self, name, group) RESULT(variable_data) + CLASS(lammps), INTENT(IN), TARGET :: self + CHARACTER(LEN=*), INTENT(IN) :: name + CHARACTER(LEN=*), INTENT(IN), OPTIONAL :: group + TYPE(lammps_variable_data) :: variable_data + + TYPE(c_ptr) :: Cptr, Cname, Cgroup, Cveclength + INTEGER(c_size_t) :: length, i + CHARACTER(KIND=c_char, LEN=1), DIMENSION(:), POINTER :: Cstring + INTEGER(c_int) :: datatype + REAL(c_double), POINTER :: double => NULL() + REAL(c_double), DIMENSION(:), POINTER :: double_vec => NULL() + INTEGER(c_int), POINTER :: Clength => NULL() + + Cname = f2c_string(name) + IF (PRESENT(group)) THEN + Cgroup = f2c_string(group) + ELSE + Cgroup = c_null_ptr + END IF + datatype = lammps_extract_variable_datatype(self%handle, Cname) + Cptr = lammps_extract_variable(self%handle, Cname, Cgroup) + CALL lammps_free(Cname) + CALL lammps_free(Cgroup) + + variable_data%lammps_instance => self + SELECT CASE (datatype) + CASE (LMP_VAR_EQUAL) + variable_data%datatype = DATA_DOUBLE + CALL C_F_POINTER(Cptr, double) + variable_data%r64 = double + CALL lammps_free(Cptr) + CASE (LMP_VAR_ATOM) + variable_data%datatype = DATA_DOUBLE_1D + length = lmp_extract_setting(self, 'nlocal') + CALL C_F_POINTER(Cptr, double_vec, [length]) + IF (ALLOCATED(variable_data%r64_vec)) DEALLOCATE(variable_data%r64_vec) + ALLOCATE(variable_data%r64_vec(length)) + variable_data%r64_vec = double_vec + CALL lammps_free(Cptr) + CASE (LMP_VAR_VECTOR) + variable_data%datatype = DATA_DOUBLE_1D + Cgroup = f2c_string('LMP_SIZE_VECTOR') ! must match library.cpp + Cname = f2c_string(name) + Cveclength = lammps_extract_variable(self%handle, Cname, Cgroup) + CALL C_F_POINTER(Cveclength, Clength) + length = Clength + CALL lammps_free(Cgroup) + CALL lammps_free(Cname) + CALL lammps_free(Cveclength) + CALL C_F_POINTER(Cptr, double_vec, [length]) + IF (ALLOCATED(variable_data%r64_vec)) & + DEALLOCATE(variable_data%r64_vec) + ALLOCATE(variable_data%r64_vec(length)) + variable_data%r64_vec = double_vec + ! DO NOT deallocate the C pointer + CASE (LMP_VAR_STRING) + variable_data%datatype = DATA_STRING + length = c_strlen(Cptr) + CALL C_F_POINTER(Cptr, Cstring, [length]) + ALLOCATE(CHARACTER(LEN=length) :: variable_data%str) + DO i = 1, length + variable_data%str(i:i) = Cstring(i) + END DO + ! DO NOT deallocate the C pointer + CASE (-1) + CALL lmp_error(self, LMP_ERROR_ALL + LMP_ERROR_WORLD, & + 'Variable "' // TRIM(name) // & + '" not found [Fortran/extract_variable]') + CASE DEFAULT + CALL lmp_error(self, LMP_ERROR_ALL + LMP_ERROR_WORLD, & + 'Unknown variable type returned from & + &lammps_extract_variable_datatype [Fortran/extract_variable]') + END SELECT + END FUNCTION lmp_extract_variable + + ! equivalent function to lammps_set_variable + SUBROUTINE lmp_set_variable(self, name, str) + CLASS(lammps), INTENT(IN) :: self + CHARACTER(LEN=*), INTENT(IN) :: name, str + INTEGER :: err + TYPE(c_ptr) :: Cstr, Cname + + Cstr = f2c_string(str) + Cname = f2c_string(name) + err = lammps_set_variable(self%handle, Cname, Cstr) + CALL lammps_free(Cname) + CALL lammps_free(Cstr) + IF (err /= 0) THEN + CALL lmp_error(self, LMP_ERROR_WARNING + LMP_ERROR_WORLD, & + 'WARNING: unable to set string variable "' // name & + // '" [Fortran/set_variable]') + END IF + END SUBROUTINE lmp_set_variable + + ! equivalent function to lammps_gather_atoms (for integers) + SUBROUTINE lmp_gather_atoms_int(self, name, count, data) + CLASS(lammps), INTENT(IN) :: self + CHARACTER(LEN=*), INTENT(IN) :: name + INTEGER(c_int), INTENT(IN) :: count + INTEGER(c_int), DIMENSION(:), ALLOCATABLE, TARGET, INTENT(OUT) :: data + TYPE(c_ptr) :: Cdata, Cname + INTEGER(c_int) :: natoms + INTEGER(c_int), PARAMETER :: Ctype = 0_c_int + REAL(c_double) :: dnatoms + CHARACTER(LEN=100) :: error_msg + + IF (count /= 1 .AND. count /= 3) THEN + CALL lmp_error(self, LMP_ERROR_ALL + LMP_ERROR_WORLD, 'gather_atoms& + & requires "count" to be 1 or 3 [Fortran/gather_atoms]') + END IF + + dnatoms = lmp_get_natoms(self) + IF (dnatoms > HUGE(1_c_int)) THEN + WRITE(error_msg,'(A,1X,I0,1X,A)') & + 'Cannot use library function gather_atoms with more than', & + HUGE(0_c_int), 'atoms [Fortran/gather_atoms]' + CALL lmp_error(self, LMP_ERROR_ALL + LMP_ERROR_WORLD, error_msg) + END IF + natoms = NINT(dnatoms, c_int) + + Cname = f2c_string(name) + IF (ALLOCATED(data)) DEALLOCATE(data) + ALLOCATE(data(natoms*count)) + Cdata = C_LOC(data(1)) + CALL lammps_gather_atoms(self%handle, Cname, Ctype, count, Cdata) + CALL lammps_free(Cname) + END SUBROUTINE lmp_gather_atoms_int + + ! equivalent function to lammps_gather_atoms (for doubles) + SUBROUTINE lmp_gather_atoms_double(self, name, count, data) + CLASS(lammps), INTENT(IN) :: self + CHARACTER(LEN=*), INTENT(IN) :: name + INTEGER(c_int), INTENT(IN) :: count + REAL(c_double), DIMENSION(:), ALLOCATABLE, TARGET, INTENT(OUT) :: data + TYPE(c_ptr) :: Cdata, Cname + INTEGER(c_int) :: natoms + INTEGER(c_int), PARAMETER :: Ctype = 1_c_int + REAL(c_double) :: dnatoms + CHARACTER(LEN=100) :: error_msg + + IF (count /= 1 .AND. count /= 3) THEN + CALL lmp_error(self, LMP_ERROR_ALL + LMP_ERROR_WORLD, 'gather_atoms& + & requires "count" to be 1 or 3 [Fortran/gather_atoms]') + END IF + + dnatoms = lmp_get_natoms(self) + IF (dnatoms > HUGE(1_c_int)) THEN + WRITE(error_msg,'(A,1X,I0,1X,A)') & + 'Cannot use library function gather_atoms with more than', & + HUGE(0_c_int), 'atoms [Fortran/gather_atoms]' + CALL lmp_error(self, LMP_ERROR_ALL + LMP_ERROR_WORLD, error_msg) + END IF + natoms = NINT(dnatoms, c_int) + + Cname = f2c_string(name) + IF (ALLOCATED(data)) DEALLOCATE(data) + ALLOCATE(data(natoms*count)) + Cdata = C_LOC(data(1)) + CALL lammps_gather_atoms(self%handle, Cname, Ctype, count, Cdata) + CALL lammps_free(Cname) + END SUBROUTINE lmp_gather_atoms_double + + ! equivalent function to lammps_gather_atoms_concat (for integers) + SUBROUTINE lmp_gather_atoms_concat_int(self, name, count, data) + CLASS(lammps), INTENT(IN) :: self + CHARACTER(LEN=*), INTENT(IN) :: name + INTEGER(c_int), INTENT(IN) :: count + INTEGER(c_int), DIMENSION(:), ALLOCATABLE, TARGET, INTENT(OUT) :: data + TYPE(c_ptr) :: Cdata, Cname + INTEGER(c_int) :: natoms + INTEGER(c_int), PARAMETER :: Ctype = 0_c_int + REAL(c_double) :: dnatoms + CHARACTER(LEN=100) :: error_msg + + IF (count /= 1 .AND. count /= 3) THEN + CALL lmp_error(self, LMP_ERROR_ALL + LMP_ERROR_WORLD, & + 'gather_atoms_concat requires "count" to be 1 or 3 & + &[Fortran/gather_atoms_concat]') + END IF + + dnatoms = lmp_get_natoms(self) + IF (dnatoms > HUGE(1_c_int)) THEN + WRITE(error_msg,'(A,1X,I0,1X,A)') & + 'Cannot use library function gather_atoms_concat with more than', & + HUGE(0_c_int), 'atoms [Fortran/gather_atoms_concat]' + CALL lmp_error(self, LMP_ERROR_ALL + LMP_ERROR_WORLD, error_msg) + END IF + natoms = NINT(dnatoms, c_int) + + Cname = f2c_string(name) + IF (ALLOCATED(data)) DEALLOCATE(data) + ALLOCATE(data(natoms*count)) + Cdata = C_LOC(data(1)) + CALL lammps_gather_atoms_concat(self%handle, Cname, Ctype, count, Cdata) + CALL lammps_free(Cname) + END SUBROUTINE lmp_gather_atoms_concat_int + + ! equivalent function to lammps_gather_atoms_concat (for doubles) + SUBROUTINE lmp_gather_atoms_concat_double(self, name, count, data) + CLASS(lammps), INTENT(IN) :: self + CHARACTER(LEN=*), INTENT(IN) :: name + INTEGER(c_int), INTENT(IN) :: count + REAL(c_double), DIMENSION(:), ALLOCATABLE, TARGET, INTENT(OUT) :: data + TYPE(c_ptr) :: Cdata, Cname + INTEGER(c_int) :: natoms + INTEGER(c_int), PARAMETER :: Ctype = 1_c_int + REAL(c_double) :: dnatoms + CHARACTER(LEN=100) :: error_msg + + IF (count /= 1 .AND. count /= 3) THEN + CALL lmp_error(self, LMP_ERROR_ALL + LMP_ERROR_WORLD, & + 'gather_atoms_concat requires "count" to be 1 or 3 & + &[Fortran/gather_atoms_concat]') + END IF + + dnatoms = lmp_get_natoms(self) + IF (dnatoms > HUGE(1_c_int)) THEN + WRITE(error_msg,'(A,1X,I0,1X,A)') & + 'Cannot use library function gather_atoms_concat with more than', & + HUGE(0_c_int), 'atoms [Fortran/gather_atoms_concat]' + CALL lmp_error(self, LMP_ERROR_ALL + LMP_ERROR_WORLD, error_msg) + END IF + natoms = NINT(dnatoms, c_int) + + Cname = f2c_string(name) + IF (ALLOCATED(data)) DEALLOCATE(data) + ALLOCATE(data(natoms*count)) + Cdata = C_LOC(data(1)) + CALL lammps_gather_atoms_concat(self%handle, Cname, Ctype, count, Cdata) + CALL lammps_free(Cname) + END SUBROUTINE lmp_gather_atoms_concat_double + + ! equivalent function to lammps_gather_atoms_subset (for integers) + SUBROUTINE lmp_gather_atoms_subset_int(self, name, count, ids, data) + CLASS(lammps), INTENT(IN) :: self + CHARACTER(LEN=*), INTENT(IN) :: name + INTEGER(c_int), INTENT(IN) :: count + INTEGER(c_int), DIMENSION(:), TARGET, INTENT(IN) :: ids + INTEGER(c_int), DIMENSION(:), ALLOCATABLE, TARGET, INTENT(OUT) :: data + INTEGER(c_int) :: ndata + TYPE(c_ptr) :: Cdata, Cname, Cids + INTEGER(c_int), PARAMETER :: Ctype = 0_c_int + + IF (count /= 1 .AND. count /= 3) THEN + CALL lmp_error(self, LMP_ERROR_ALL + LMP_ERROR_WORLD, & + 'gather_atoms_subset requires "count" to be 1 or 3 & + &[Fortran/gather_atoms]') + END IF + + ndata = SIZE(ids, KIND=c_int) + + Cname = f2c_string(name) + IF (ALLOCATED(data)) DEALLOCATE(data) + ALLOCATE(data(ndata*count)) + data = -1_c_int + Cdata = C_LOC(data(1)) + Cids = C_LOC(ids(1)) + CALL lammps_gather_atoms_subset(self%handle, Cname, Ctype, count, & + ndata, Cids, Cdata) + CALL lammps_free(Cname) + END SUBROUTINE lmp_gather_atoms_subset_int + + ! equivalent function to lammps_gather_atoms_subset (for doubles) + SUBROUTINE lmp_gather_atoms_subset_double(self, name, count, ids, data) + CLASS(lammps), INTENT(IN) :: self + CHARACTER(LEN=*), INTENT(IN) :: name + INTEGER(c_int), INTENT(IN) :: count + INTEGER(c_int), DIMENSION(:), TARGET, INTENT(IN) :: ids + REAL(c_double), DIMENSION(:), ALLOCATABLE, TARGET, INTENT(OUT) :: data + INTEGER(c_int) :: ndata + TYPE(c_ptr) :: Cdata, Cname, Cids + INTEGER(c_int), PARAMETER :: Ctype = 1_c_int + + IF (count /= 1 .AND. count /= 3) THEN + CALL lmp_error(self, LMP_ERROR_ALL + LMP_ERROR_WORLD, & + 'gather_atoms_subset requires "count" to be 1 or 3 & + &[Fortran/gather_atoms]') + END IF + + ndata = SIZE(ids, KIND=c_int) + + Cname = f2c_string(name) + IF (ALLOCATED(data)) DEALLOCATE(data) + ALLOCATE(data(ndata*count)) + Cdata = C_LOC(data(1)) + Cids = C_LOC(ids(1)) + CALL lammps_gather_atoms_subset(self%handle, Cname, Ctype, count, & + ndata, Cids, Cdata) + CALL lammps_free(Cname) + END SUBROUTINE lmp_gather_atoms_subset_double + + ! equivalent function to lammps_scatter_atoms (for integers) + SUBROUTINE lmp_scatter_atoms_int(self, name, data) + CLASS(lammps), INTENT(IN) :: self + CHARACTER(LEN=*), INTENT(IN) :: name + INTEGER(c_int), DIMENSION(:), TARGET :: data + INTEGER(c_int) :: natoms, Ccount + INTEGER(c_int), PARAMETER :: Ctype = 0_c_int + TYPE(c_ptr) :: Cname, Cdata + REAL(c_double) :: dnatoms + CHARACTER(LEN=100) :: error_msg + + dnatoms = lmp_get_natoms(self) + IF (dnatoms > HUGE(1_c_int)) THEN + WRITE(error_msg,'(A,1X,I0,1X,A)') & + 'Cannot use library function scatter_atoms with more than', & + HUGE(0_c_int), 'atoms [Fortran/scatter_atoms]' + CALL lmp_error(self, LMP_ERROR_ALL + LMP_ERROR_WORLD, error_msg) + END IF + natoms = NINT(dnatoms, c_int) + + Cname = f2c_string(name) + Cdata = C_LOC(data(1)) + Ccount = SIZE(data) / natoms + + IF (Ccount /= 1 .AND. Ccount /= 3) THEN + CALL lmp_error(self, LMP_ERROR_ALL + LMP_ERROR_WORLD, & + 'lammps_scatter_atoms requires either 1 or 3 data per atom') + END IF + CALL lammps_scatter_atoms(self%handle, Cname, Ctype, Ccount, Cdata) + CALL lammps_free(Cname) + END SUBROUTINE lmp_scatter_atoms_int + + ! equivalent function to lammps_scatter_atoms (for doubles) + SUBROUTINE lmp_scatter_atoms_double(self, name, data) + CLASS(lammps), INTENT(IN) :: self + CHARACTER(LEN=*), INTENT(IN) :: name + REAL(c_double), DIMENSION(:), TARGET :: data + INTEGER(c_int) :: natoms, Ccount + INTEGER(c_int), PARAMETER :: Ctype = 1_c_int + TYPE(c_ptr) :: Cname, Cdata + REAL(c_double) :: dnatoms + CHARACTER(LEN=100) :: error_msg + + dnatoms = lmp_get_natoms(self) + IF (dnatoms > HUGE(1_c_int)) THEN + WRITE(error_msg,'(A,1X,I0,1X,A)') & + 'Cannot use library function scatter_atoms with more than', & + HUGE(0_c_int), 'atoms [Fortran/scatter_atoms]' + CALL lmp_error(self, LMP_ERROR_ALL + LMP_ERROR_WORLD, error_msg) + END IF + natoms = NINT(dnatoms, c_int) + + Cname = f2c_string(name) + Cdata = C_LOC(data(1)) + Ccount = SIZE(data) / natoms + + IF (Ccount /= 1 .AND. Ccount /= 3) THEN + CALL lmp_error(self, LMP_ERROR_ALL + LMP_ERROR_WORLD, & + 'scatter_atoms requires either 1 or 3 data per atom & + &[Fortran/scatter_atoms]') + END IF + CALL lammps_scatter_atoms(self%handle, Cname, Ctype, Ccount, Cdata) + CALL lammps_free(Cname) + END SUBROUTINE lmp_scatter_atoms_double + + SUBROUTINE lmp_scatter_atoms_subset_int(self, name, ids, data) + CLASS(lammps), INTENT(IN) :: self + CHARACTER(LEN=*), INTENT(IN) :: name + INTEGER(c_int), DIMENSION(:), TARGET :: ids + INTEGER(c_int), DIMENSION(:), TARGET :: data + INTEGER(c_int), PARAMETER :: Ctype = 0_c_int + INTEGER(c_int) :: Cndata, Ccount + TYPE(c_ptr) :: Cdata, Cname, Cids + + Cndata = SIZE(ids, KIND=c_int) + Ccount = SIZE(data, KIND=c_int) / Cndata + IF (Ccount /= 1 .AND. Ccount /= 3) THEN + CALL lmp_error(self, LMP_ERROR_ALL + LMP_ERROR_WORLD, & + 'scatter_atoms_subset requires either 1 or 3 data per atom') + END IF + + Cname = f2c_string(name) + Cdata = C_LOC(data(1)) + Cids = C_LOC(ids(1)) + CALL lammps_scatter_atoms_subset(self%handle, Cname, Ctype, Ccount, & + Cndata, Cids, Cdata) + CALL lammps_free(Cname) + END SUBROUTINE lmp_scatter_atoms_subset_int + + SUBROUTINE lmp_scatter_atoms_subset_double(self, name, ids, data) + CLASS(lammps), INTENT(IN) :: self + CHARACTER(LEN=*), INTENT(IN) :: name + INTEGER(c_int), DIMENSION(:), TARGET :: ids + REAL(c_double), DIMENSION(:), TARGET :: data + INTEGER(c_int), PARAMETER :: Ctype = 1_c_int + INTEGER(c_int) :: Cndata, Ccount + TYPE(c_ptr) :: Cdata, Cname, Cids + + Cndata = SIZE(ids, KIND=c_int) + Ccount = SIZE(data, KIND=c_int) / Cndata + IF (Ccount /= 1 .AND. Ccount /= 3) THEN + CALL lmp_error(self, LMP_ERROR_ALL + LMP_ERROR_WORLD, & + 'scatter_atoms_subset requires either 1 or 3 data per atom') + END IF + + Cname = f2c_string(name) + Cdata = C_LOC(data(1)) + Cids = C_LOC(ids(1)) + CALL lammps_scatter_atoms_subset(self%handle, Cname, Ctype, Ccount, & + Cndata, Cids, Cdata) + CALL lammps_free(Cname) + END SUBROUTINE lmp_scatter_atoms_subset_double + + ! equivalent function to lammps_version INTEGER FUNCTION lmp_version(self) - CLASS(lammps) :: self + CLASS(lammps), INTENT(IN) :: self lmp_version = lammps_version(self%handle) END FUNCTION lmp_version + ! equivalent function to lammps_get_os_info + SUBROUTINE lmp_get_os_info(buffer) + CHARACTER(LEN=*) :: buffer + INTEGER(c_int) :: buf_size + CHARACTER(LEN=1,KIND=c_char), DIMENSION(LEN(buffer)), TARGET :: Cbuffer + TYPE(c_ptr) :: ptr + INTEGER :: i + + buffer = ' ' + ptr = C_LOC(Cbuffer(1)) + buf_size = LEN(buffer) + CALL lammps_get_os_info(ptr, buf_size) + DO i=1,buf_size + IF (Cbuffer(i) == c_null_char) EXIT + buffer(i:i) = Cbuffer(i) + END DO + END SUBROUTINE lmp_get_os_info + + ! equivalent function to lammps_config_has_mpi_support + LOGICAL FUNCTION lmp_config_has_mpi_support() + INTEGER(c_int) :: has_mpi_support + + has_mpi_support = lammps_config_has_mpi_support() + lmp_config_has_mpi_support = (has_mpi_support /= 0_c_int) + END FUNCTION lmp_config_has_mpi_support + + ! equivalent function to lammps_config_has_gzip_support + LOGICAL FUNCTION lmp_config_has_gzip_support() + INTEGER(c_int) :: has_gzip_support + + has_gzip_support = lammps_config_has_gzip_support() + lmp_config_has_gzip_support = (has_gzip_support /= 0_c_int) + END FUNCTION lmp_config_has_gzip_support + + ! equivalent function to lammps_config_has_png_support + LOGICAL FUNCTION lmp_config_has_png_support() + INTEGER(c_int) :: has_png_support + + has_png_support = lammps_config_has_png_support() + lmp_config_has_png_support = (has_png_support /= 0_c_int) + END FUNCTION lmp_config_has_png_support + + ! equivalent function to lammps_config_has_jpeg_support + LOGICAL FUNCTION lmp_config_has_jpeg_support() + INTEGER(c_int) :: has_jpeg_support + + has_jpeg_support = lammps_config_has_jpeg_support() + lmp_config_has_jpeg_support = (has_jpeg_support /= 0_c_int) + END FUNCTION lmp_config_has_jpeg_support + + ! equivalent function to lammps_config_has_ffmpeg_support + LOGICAL FUNCTION lmp_config_has_ffmpeg_support() + INTEGER(c_int) :: has_ffmpeg_support + + has_ffmpeg_support = lammps_config_has_ffmpeg_support() + lmp_config_has_ffmpeg_support = (has_ffmpeg_support /= 0_c_int) + END FUNCTION lmp_config_has_ffmpeg_support + + ! equivalent function to lammps_config_has_exceptions + LOGICAL FUNCTION lmp_config_has_exceptions() + INTEGER(c_int) :: has_exceptions + + has_exceptions = lammps_config_has_exceptions() + lmp_config_has_exceptions = (has_exceptions /= 0_c_int) + END FUNCTION lmp_config_has_exceptions + + ! equivalent function to lammps_config_has_package + LOGICAL FUNCTION lmp_config_has_package(name) + CHARACTER(LEN=*), INTENT(IN) :: name + INTEGER(c_int) :: has_package + TYPE(c_ptr) :: Cname + + Cname = f2c_string(name) + has_package = lammps_config_has_package(Cname) + lmp_config_has_package = (has_package /= 0_c_int) + CALL lammps_free(Cname) + END FUNCTION lmp_config_has_package + + ! equivalent subroutine to lammps_config_package_name + SUBROUTINE lmp_config_package_name(idx, buffer) + INTEGER, INTENT(IN) :: idx + CHARACTER(LEN=*), INTENT(OUT) :: buffer + INTEGER(c_int) :: Cidx, Csuccess + TYPE(c_ptr) :: Cptr + CHARACTER(LEN=1,KIND=c_char), TARGET :: Cbuffer(LEN(buffer)+1) + INTEGER(c_size_t) :: i, strlen + + Cidx = idx - 1 + Cptr = C_LOC(Cbuffer(1)) + Csuccess = lammps_config_package_name(Cidx, Cptr, LEN(buffer)+1) + buffer = ' ' + IF (Csuccess /= 0_c_int) THEN + strlen = c_strlen(Cptr) + DO i = 1, strlen + buffer(i:i) = Cbuffer(i) + END DO + END IF + END SUBROUTINE lmp_config_package_name + + ! equivalent function to Python routine .installed_packages() + SUBROUTINE lmp_installed_packages(package, length) + CHARACTER(LEN=:), DIMENSION(:), ALLOCATABLE, INTENT(OUT) :: package + INTEGER, INTENT(IN), OPTIONAL :: length + INTEGER, PARAMETER :: MAX_BUFFER_LENGTH = 31 + INTEGER :: i, npackage, buf_length + + IF (PRESENT(length)) THEN + buf_length = length + ELSE + buf_length = MAX_BUFFER_LENGTH + END IF + + IF (ALLOCATED(package)) DEALLOCATE(package) + npackage = lammps_config_package_count() + ALLOCATE(CHARACTER(LEN=MAX_BUFFER_LENGTH) :: package(npackage)) + DO i=1, npackage + CALL lmp_config_package_name(i, package(i)) + END DO + END SUBROUTINE lmp_installed_packages + + ! equivalent function to lammps_flush_buffers + SUBROUTINE lmp_flush_buffers(self) + CLASS(lammps), INTENT(IN) :: self + + CALL lammps_flush_buffers(self%handle) + END SUBROUTINE lmp_flush_buffers + ! equivalent function to lammps_is_running LOGICAL FUNCTION lmp_is_running(self) - CLASS(lammps) :: self + CLASS(lammps), INTENT(IN) :: self - lmp_is_running = ( lammps_is_running(self%handle) /= 0_C_int ) + lmp_is_running = (lammps_is_running(self%handle) /= 0_c_int) END FUNCTION lmp_is_running + ! equivalent function to lammps_force_timeout + SUBROUTINE lmp_force_timeout(self) + CLASS(lammps), INTENT(IN) :: self + + CALL lammps_force_timeout(self%handle) + END SUBROUTINE + + ! equivalent function to lammps_has_error + LOGICAL FUNCTION lmp_has_error(self) + CLASS(lammps), INTENT(IN) :: self + INTEGER(c_int) :: has_error + + has_error = lammps_has_error(self%handle) + lmp_has_error = (has_error /= 0_c_int) + END FUNCTION lmp_has_error + + ! equivalent function to lammps_get_last_error_message + SUBROUTINE lmp_get_last_error_message(self, buffer, status) + CLASS(lammps), INTENT(IN) :: self + CHARACTER(LEN=*), INTENT(OUT) :: buffer + INTEGER, INTENT(OUT), OPTIONAL :: status + INTEGER(c_int) :: buflen, Cstatus + INTEGER(c_size_t) :: i, length + TYPE(c_ptr) :: Cptr + CHARACTER(LEN=1, KIND=c_char), POINTER :: c_string(:) + + buffer = ' ' + IF (lmp_has_error(self)) THEN + buflen = LEN(buffer) + length = buflen + Cptr = lammps_malloc(length) + Cstatus = lammps_get_last_error_message(self%handle, Cptr, buflen) + CALL C_F_POINTER(Cptr, c_string, [1]) + DO i=1, length + buffer(i:i) = c_string(i) + IF (c_string(i) == c_null_char) EXIT + END DO + IF (PRESENT(status)) THEN + status = Cstatus + END IF + CALL lammps_free(Cptr) + ELSE + buffer = ' ' + IF (PRESENT(status)) THEN + status = 0 + END IF + END IF + END SUBROUTINE lmp_get_last_error_message + ! ---------------------------------------------------------------------- ! functions to assign user-space pointers to LAMMPS data ! ---------------------------------------------------------------------- - SUBROUTINE assign_int_to_lammps_data (lhs, rhs) + SUBROUTINE assign_int_to_lammps_data(lhs, rhs) INTEGER(c_int), INTENT(OUT), POINTER :: lhs CLASS(lammps_data), INTENT(IN) :: rhs - IF ( rhs%datatype == DATA_INT ) THEN + IF (rhs%datatype == DATA_INT) THEN lhs => rhs%i32 ELSE - CALL assignment_error(rhs%datatype, 'scalar int') + CALL assignment_error(rhs, 'scalar int') END IF END SUBROUTINE assign_int_to_lammps_data - SUBROUTINE assign_int64_to_lammps_data (lhs, rhs) + SUBROUTINE assign_int64_to_lammps_data(lhs, rhs) INTEGER(c_int64_t), INTENT(OUT), POINTER :: lhs CLASS(lammps_data), INTENT(IN) :: rhs - IF ( rhs%datatype == DATA_INT64 ) THEN + IF (rhs%datatype == DATA_INT64) THEN lhs => rhs%i64 ELSE - CALL assignment_error(rhs%datatype, 'scalar long int') + CALL assignment_error(rhs, 'scalar long int') END IF END SUBROUTINE assign_int64_to_lammps_data - SUBROUTINE assign_intvec_to_lammps_data (lhs, rhs) + SUBROUTINE assign_intvec_to_lammps_data(lhs, rhs) INTEGER(c_int), DIMENSION(:), INTENT(OUT), POINTER :: lhs CLASS(lammps_data), INTENT(IN) :: rhs - IF ( rhs%datatype == DATA_INT_1D ) THEN + IF (rhs%datatype == DATA_INT_1D) THEN lhs => rhs%i32_vec ELSE - CALL assignment_error(rhs%datatype, 'vector of ints') + CALL assignment_error(rhs, 'vector of ints') END IF END SUBROUTINE assign_intvec_to_lammps_data - SUBROUTINE assign_double_to_lammps_data (lhs, rhs) + SUBROUTINE assign_int64vec_to_lammps_data(lhs, rhs) + INTEGER(c_int64_t), DIMENSION(:), INTENT(OUT), POINTER :: lhs + CLASS(lammps_data), INTENT(IN) :: rhs + + IF (rhs%datatype == DATA_INT64_1D) THEN + lhs => rhs%i64_vec + ELSE + CALL assignment_error(rhs, 'vector of long ints') + END IF + END SUBROUTINE assign_int64vec_to_lammps_data + + SUBROUTINE assign_double_to_lammps_data(lhs, rhs) REAL(c_double), INTENT(OUT), POINTER :: lhs CLASS(lammps_data), INTENT(IN) :: rhs - IF ( rhs%datatype == DATA_DOUBLE ) THEN + IF (rhs%datatype == DATA_DOUBLE) THEN lhs => rhs%r64 ELSE - CALL assignment_error(rhs%datatype, 'scalar double') + CALL assignment_error(rhs, 'scalar double') END IF END SUBROUTINE assign_double_to_lammps_data - SUBROUTINE assign_doublevec_to_lammps_data (lhs, rhs) + SUBROUTINE assign_doublevec_to_lammps_data(lhs, rhs) REAL(c_double), DIMENSION(:), INTENT(OUT), POINTER :: lhs CLASS(lammps_data), INTENT(IN) :: rhs - IF ( rhs%datatype == DATA_DOUBLE_1D ) THEN + IF (rhs%datatype == DATA_DOUBLE_1D) THEN lhs => rhs%r64_vec ELSE - CALL assignment_error(rhs%datatype, 'vector of doubles') + CALL assignment_error(rhs, 'vector of doubles') END IF END SUBROUTINE assign_doublevec_to_lammps_data - SUBROUTINE assign_string_to_lammps_data (lhs, rhs) + SUBROUTINE assign_doublemat_to_lammps_data(lhs, rhs) + REAL(c_double), DIMENSION(:,:), INTENT(OUT), POINTER :: lhs + CLASS(lammps_data), INTENT(IN) :: rhs + + IF (rhs%datatype == DATA_DOUBLE_2D) THEN + lhs => rhs%r64_mat + ELSE + CALL assignment_error(rhs, 'matrix of doubles') + END IF + END SUBROUTINE assign_doublemat_to_lammps_data + + SUBROUTINE assign_string_to_lammps_data(lhs, rhs) CHARACTER(LEN=*), INTENT(OUT) :: lhs CLASS(lammps_data), INTENT(IN) :: rhs - IF ( rhs%datatype == DATA_STRING ) THEN + IF (rhs%datatype == DATA_STRING) THEN lhs = rhs%str + IF (LEN_TRIM(rhs%str) > LEN(lhs)) THEN + CALL lmp_error(rhs%lammps_instance, LMP_ERROR_WARNING, & + 'String provided by user required truncation [Fortran API]') + END IF ELSE - CALL assignment_error(rhs%datatype, 'string') + CALL assignment_error(rhs, 'string') END IF END SUBROUTINE assign_string_to_lammps_data - SUBROUTINE assignment_error (type1, type2) - INTEGER (c_int) :: type1 - CHARACTER (LEN=*) :: type2 - INTEGER, PARAMETER :: ERROR_CODE = 1 - CHARACTER (LEN=:), ALLOCATABLE :: str1 + ! ---------------------------------------------------------------------- + ! functions to assign user-space pointers to LAMMPS *fix* data + ! ---------------------------------------------------------------------- + SUBROUTINE assign_double_to_lammps_fix_data(lhs, rhs) + REAL(c_double), INTENT(OUT) :: lhs + CLASS(lammps_fix_data), INTENT(IN) :: rhs - SELECT CASE (type1) - CASE (DATA_INT) + IF (rhs%datatype == DATA_DOUBLE) THEN + lhs = rhs%r64 + ELSE + CALL assignment_error(rhs, 'scalar double') + END IF + END SUBROUTINE assign_double_to_lammps_fix_data + + SUBROUTINE assign_doublevec_to_lammps_fix_data(lhs, rhs) + REAL(c_double), DIMENSION(:), INTENT(OUT), POINTER :: lhs + CLASS(lammps_fix_data), INTENT(IN) :: rhs + + IF (rhs%datatype == DATA_DOUBLE_1D) THEN + lhs => rhs%r64_vec + ELSE + CALL assignment_error(rhs, 'vector of doubles') + END IF + END SUBROUTINE assign_doublevec_to_lammps_fix_data + + SUBROUTINE assign_doublemat_to_lammps_fix_data(lhs, rhs) + REAL(c_double), DIMENSION(:,:), INTENT(OUT), POINTER :: lhs + CLASS(lammps_fix_data), INTENT(IN) :: rhs + + IF (rhs%datatype == DATA_DOUBLE_2D) THEN + lhs => rhs%r64_mat + ELSE + CALL assignment_error(rhs, 'matrix of doubles') + END IF + END SUBROUTINE assign_doublemat_to_lammps_fix_data + + ! ---------------------------------------------------------------------- + ! functions to assign user-space pointers to LAMMPS *variable* data + ! ---------------------------------------------------------------------- + SUBROUTINE assign_double_to_lammps_variable_data(lhs, rhs) + REAL(c_double), INTENT(OUT) :: lhs + CLASS(lammps_variable_data), INTENT(IN) :: rhs + + IF (rhs%datatype == DATA_DOUBLE) THEN + lhs = rhs%r64 + ELSE + CALL assignment_error(rhs, 'scalar double') + END IF + END SUBROUTINE assign_double_to_lammps_variable_data + + SUBROUTINE assign_doublevec_to_lammps_variable_data(lhs, rhs) + REAL(c_double), DIMENSION(:), ALLOCATABLE, INTENT(OUT) :: lhs + CLASS(lammps_variable_data), INTENT(IN) :: rhs + + IF (rhs%datatype == DATA_DOUBLE_1D) THEN + IF (ALLOCATED(lhs)) DEALLOCATE(lhs) + ALLOCATE(lhs(SIZE(rhs%r64_vec))) + lhs = rhs%r64_vec + ELSE + CALL assignment_error(rhs, 'vector of doubles') + END IF + END SUBROUTINE assign_doublevec_to_lammps_variable_data + + SUBROUTINE assign_string_to_lammps_variable_data(lhs, rhs) + CHARACTER(LEN=*), INTENT(OUT) :: lhs + CLASS(lammps_variable_data), INTENT(IN) :: rhs + + IF (rhs%datatype == DATA_STRING) THEN + lhs = rhs%str + IF (LEN_TRIM(rhs%str) > LEN(lhs)) THEN + CALL lmp_error(rhs%lammps_instance, LMP_ERROR_WARNING, & + 'String provided by user required truncation [Fortran API]') + END IF + ELSE + CALL assignment_error(rhs, 'string') + END IF + END SUBROUTINE assign_string_to_lammps_variable_data + + ! ---------------------------------------------------------------------- + ! Generic function to catch all errors in assignments of LAMMPS data to + ! user-space variables/pointers + ! ---------------------------------------------------------------------- + SUBROUTINE assignment_error(type1, str2) + CLASS(lammps_data_baseclass), INTENT(IN) :: type1 + CHARACTER(LEN=*), INTENT(IN) :: str2 + CHARACTER(LEN=:), ALLOCATABLE :: str1 + + SELECT CASE(type1%datatype) + CASE(DATA_INT) str1 = 'scalar int' - CASE (DATA_INT_1D) + CASE(DATA_INT_1D) str1 = 'vector of ints' - CASE (DATA_INT_2D) + CASE(DATA_INT_2D) str1 = 'matrix of ints' - CASE (DATA_INT64) + CASE(DATA_INT64) str1 = 'scalar long int' - CASE (DATA_INT64_1D) + CASE(DATA_INT64_1D) str1 = 'vector of long ints' - CASE (DATA_INT64_2D) + CASE(DATA_INT64_2D) str1 = 'matrix of long ints' - CASE (DATA_DOUBLE) + CASE(DATA_DOUBLE) str1 = 'scalar double' - CASE (DATA_DOUBLE_1D) + CASE(DATA_DOUBLE_1D) str1 = 'vector of doubles' - CASE (DATA_DOUBLE_2D) + CASE(DATA_DOUBLE_2D) str1 = 'matrix of doubles' + CASE(DATA_STRING) + str1 = 'string' CASE DEFAULT str1 = 'that type' END SELECT - WRITE (ERROR_UNIT,'(A)') 'Cannot associate ' // str1 // ' with ' // type2 - STOP ERROR_CODE + CALL lmp_error(type1%lammps_instance, LMP_ERROR_ALL + LMP_ERROR_WORLD, & + 'cannot associate ' // str1 // ' with ' // str2 // ' [Fortran API]') END SUBROUTINE assignment_error ! ---------------------------------------------------------------------- diff --git a/lib/gpu/Makefile.oneapi b/lib/gpu/Makefile.oneapi index 015ab47057..d0ed78d0c4 100644 --- a/lib/gpu/Makefile.oneapi +++ b/lib/gpu/Makefile.oneapi @@ -1,5 +1,5 @@ -# /* ---------------------------------------------------------------------- -# Generic Linux Makefile for OpenCL +# /* ---------------------------------------------------------------------- +# Linux Makefile for Intel oneAPI - Mixed precision # ------------------------------------------------------------------------- */ # which file will be copied to Makefile.lammps @@ -11,11 +11,14 @@ EXTRAMAKE = Makefile.lammps.opencl LMP_INC = -DLAMMPS_SMALLBIG -OCL_INC = -OCL_CPP = mpiicpc -std=c++11 -xHost -O2 -qopenmp -qopenmp-simd -DMPICH_IGNORE_CXX_SEEK $(LMP_INC) $(OCL_INC) -OCL_LINK = -lOpenCL +OCL_INC = -I$(ONEAPI_ROOT)/compiler/latest/linux/include/sycl/ +CPP_OPT = -xHost -O2 -qopenmp -qopenmp-simd -fp-model fast=2 -no-prec-div \ + -qoverride-limits +OCL_CPP = mpiicpc -std=c++11 -diag-disable=10441 -DMPICH_IGNORE_CXX_SEEK \ + $(LMP_INC) $(OCL_INC) $(CPP_OPT) +OCL_LINK = -L$(ONEAPI_ROOT)/compiler/latest/linux/lib -lOpenCL OCL_PREC = -D_SINGLE_DOUBLE -OCL_TUNE = -DMPI_GERYON -DGERYON_NUMA_FISSION -DUCL_NO_EXIT -fp-model fast=2 -no-prec-div +OCL_TUNE = -DMPI_GERYON -DCUDA_PROXY -DGERYON_NUMA_FISSION -DUCL_NO_EXIT BIN_DIR = ./ OBJ_DIR = ./ diff --git a/lib/gpu/README b/lib/gpu/README index 100179feca..79397a7066 100644 --- a/lib/gpu/README +++ b/lib/gpu/README @@ -264,6 +264,20 @@ GERYON_KERNEL_DUMP Dump all compiled OpenCL programs with compiler flags and build logs GPU_CAST Casting performed on GPU, untested recently THREE_CONCURRENT Concurrent 3-body calcs in separate queues, untested +LAL_SERIALIZE_INIT Force serialization of initialization and compilation + for multiple MPI tasks sharing the same accelerator. + Some accelerator API implementations have had issues + with temporary file conflicts in the past. +GERYON_FORCE_SHARED_MAIN_MEM_ON Should only be used for builds where the + accelerator is guaranteed to share physical + main memory with the host (e.g. integrated + GPU or CPU device). Default behavior is to + auto-detect. Impacts OpenCL only. +GERYON_FORCE_SHARED_MAIN_MEM_OFF Should only be used for builds where the + accelerator is guaranteed to have discrete + physical main memory vs the host (discrete + GPU card). Default behavior is to + auto-detect. Impacts OpenCL only. ------------------------------------------------------------------------------ diff --git a/lib/gpu/geryon/ocl_device.h b/lib/gpu/geryon/ocl_device.h index 506ee1fe91..095c1395f2 100644 --- a/lib/gpu/geryon/ocl_device.h +++ b/lib/gpu/geryon/ocl_device.h @@ -126,10 +126,13 @@ class UCL_Device { /// Return the number of devices that support OpenCL inline int num_devices() { return _num_devices; } - /// Specify whether profiling (device timers) will be used for the device (yes=true) + /// Specify whether profiling (device timers) will be used (yes=true) /** No-op for CUDA and HIP **/ - inline void configure_profiling(const bool profiling_on) - { _cq_profiling = profiling_on; } + inline void configure_profiling(const bool profiling_on) { + #ifndef GERYON_NO_OCL_MARKERS + _cq_profiling = profiling_on; + #endif + } /// Set the OpenCL device to the specified device number /** A context and default command queue will be created for the device * @@ -176,8 +179,8 @@ class UCL_Device { #ifdef CL_VERSION_2_0 if (_cq_profiling) { - cl_queue_properties props[] = {CL_QUEUE_PROPERTIES, CL_QUEUE_PROFILING_ENABLE, - 0}; + cl_queue_properties props[] = {CL_QUEUE_PROPERTIES, + CL_QUEUE_PROFILING_ENABLE, 0}; _cq.back()=clCreateCommandQueueWithProperties(_context, _cl_device, props, &errorv); } else { @@ -187,8 +190,8 @@ class UCL_Device { } #else if (_cq_profiling) - _cq.back()=clCreateCommandQueue(_context, _cl_device, CL_QUEUE_PROFILING_ENABLE, - &errorv); + _cq.back()=clCreateCommandQueue(_context, _cl_device, + CL_QUEUE_PROFILING_ENABLE, &errorv); else _cq.back()=clCreateCommandQueue(_context, _cl_device, 0, &errorv); #endif @@ -403,7 +406,11 @@ class UCL_Device { // Grabs the properties for all devices UCL_Device::UCL_Device() { _device=-1; + #ifndef GERYON_NO_OCL_MARKERS _cq_profiling=true; + #else + _cq_profiling=false; + #endif // --- Get Number of Platforms cl_uint nplatforms; @@ -482,6 +489,7 @@ int UCL_Device::set_platform(int pid) { _num_devices = 0; for (int i=0; i 1) { - cl_device_id *subdevice_list = new cl_device_id[num_subdevices]; - CL_SAFE_CALL(clCreateSubDevices(device_list[i], props, num_subdevices, - subdevice_list, &num_subdevices)); - for (cl_uint j=0; j 1) { + subdevice_list = new cl_device_id[num_subdevices]; + err = clCreateSubDevices(device_list[i], props, num_subdevices, + subdevice_list, &num_subdevices); + if (err != CL_SUCCESS) { + delete[] subdevice_list; + num_subdevices = 1; + subdevice_list = device_list + i; } - delete[] subdevice_list; - } else { - _cl_devices.push_back(device_list[i]); - add_properties(device_list[i]); - _num_devices++; } #endif + + for (cl_uint j=0; j 1) delete[] subdevice_list; } // for i #endif @@ -686,10 +698,10 @@ void UCL_Device::add_properties(cl_device_id device_list) { double arch = static_cast(minor)/10+major; if (arch >= 3.0) op.has_shuffle_support=true; - op.shared_main_memory=_shared_mem_device(device_list); } delete[] buffer2; #endif + op.shared_main_memory=_shared_mem_device(device_list); _properties.push_back(op); } diff --git a/lib/gpu/geryon/ocl_timer.h b/lib/gpu/geryon/ocl_timer.h index bd77170ed9..189871e631 100644 --- a/lib/gpu/geryon/ocl_timer.h +++ b/lib/gpu/geryon/ocl_timer.h @@ -27,11 +27,15 @@ #include "ocl_macros.h" #include "ocl_device.h" +#ifndef GERYON_NO_OCL_MARKERS #ifdef CL_VERSION_1_2 #define UCL_OCL_MARKER(cq,event) clEnqueueMarkerWithWaitList(cq,0,nullptr,event) #else #define UCL_OCL_MARKER clEnqueueMarker #endif +#else +#define UCL_OCL_MARKER(cq,event) +#endif namespace ucl_opencl { @@ -51,8 +55,10 @@ class UCL_Timer { inline void clear() { if (_initialized) { if (has_measured_time) { + #ifndef GERYON_NO_OCL_MARKERS clReleaseEvent(start_event); clReleaseEvent(stop_event); + #endif has_measured_time = false; } CL_DESTRUCT_CALL(clReleaseCommandQueue(_cq)); @@ -76,8 +82,10 @@ class UCL_Timer { /// Start timing on default command queue inline void start() { if (has_measured_time) { + #ifndef GERYON_NO_OCL_MARKERS clReleaseEvent(start_event); clReleaseEvent(stop_event); + #endif has_measured_time = false; } UCL_OCL_MARKER(_cq,&start_event); @@ -91,17 +99,26 @@ class UCL_Timer { /// Block until the start event has been reached on device inline void sync_start() { + #ifndef GERYON_NO_OCL_MARKERS + CL_SAFE_CALL(clWaitForEvents(1,&start_event)); if (has_measured_time) { clReleaseEvent(start_event); clReleaseEvent(stop_event); has_measured_time = false; } - CL_SAFE_CALL(clWaitForEvents(1,&start_event)); + #else + CL_SAFE_CALL(clFinish(_cq)); + has_measured_time = false; + #endif } /// Block until the stop event has been reached on device inline void sync_stop() { + #ifndef GERYON_NO_OCL_MARKERS CL_SAFE_CALL(clWaitForEvents(1,&stop_event)); + #else + CL_SAFE_CALL(clFinish(_cq)); + #endif has_measured_time = true; } @@ -126,6 +143,7 @@ class UCL_Timer { /// Return the time (ms) of last start to stop - Forces synchronization inline double time() { if(!has_measured_time) return 0.0; + #ifndef GERYON_NO_OCL_MARKERS cl_ulong tstart,tend; CL_SAFE_CALL(clWaitForEvents(1,&stop_event)); CL_SAFE_CALL(clGetEventProfilingInfo(stop_event, @@ -138,6 +156,11 @@ class UCL_Timer { clReleaseEvent(stop_event); has_measured_time = false; return (tend-tstart)*1e-6; + #else + CL_SAFE_CALL(clFinish(_cq)); + has_measured_time = false; + return 0.0; + #endif } /// Return the time (s) of last start to stop - Forces synchronization diff --git a/lib/gpu/lal_beck_ext.cpp b/lib/gpu/lal_beck_ext.cpp index ab65237e27..d4be986ff5 100644 --- a/lib/gpu/lal_beck_ext.cpp +++ b/lib/gpu/lal_beck_ext.cpp @@ -76,7 +76,7 @@ int beck_gpu_init(const int ntypes, double **cutsq, double **aa, special_lj, inum, nall, max_nbors, maxspecial, cell_size, gpu_split, screen); - BLMF.device->gpu_barrier(); + BLMF.device->serialize_init(); if (message) fprintf(screen,"Done.\n"); } diff --git a/lib/gpu/lal_born_coul_long_cs_ext.cpp b/lib/gpu/lal_born_coul_long_cs_ext.cpp index fc6b89692f..3d6383acca 100644 --- a/lib/gpu/lal_born_coul_long_cs_ext.cpp +++ b/lib/gpu/lal_born_coul_long_cs_ext.cpp @@ -84,7 +84,7 @@ int bornclcs_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv, gpu_split, screen, host_cut_ljsq, host_cut_coulsq, host_special_coul, qqrd2e, g_ewald); - BCLCSMF.device->gpu_barrier(); + BCLCSMF.device->serialize_init(); if (message) fprintf(screen,"Done.\n"); } diff --git a/lib/gpu/lal_born_coul_long_ext.cpp b/lib/gpu/lal_born_coul_long_ext.cpp index 9d17f2fa7d..6975f5237e 100644 --- a/lib/gpu/lal_born_coul_long_ext.cpp +++ b/lib/gpu/lal_born_coul_long_ext.cpp @@ -84,7 +84,7 @@ int borncl_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv, gpu_split, screen, host_cut_ljsq, host_cut_coulsq, host_special_coul, qqrd2e, g_ewald); - BORNCLMF.device->gpu_barrier(); + BORNCLMF.device->serialize_init(); if (message) fprintf(screen,"Done.\n"); } diff --git a/lib/gpu/lal_born_coul_wolf_cs_ext.cpp b/lib/gpu/lal_born_coul_wolf_cs_ext.cpp index ae162a7c52..be07504727 100644 --- a/lib/gpu/lal_born_coul_wolf_cs_ext.cpp +++ b/lib/gpu/lal_born_coul_wolf_cs_ext.cpp @@ -86,7 +86,7 @@ int borncwcs_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv, host_cut_coulsq, host_special_coul, qqrd2e, alf, e_shift, f_shift); - BornCWCST.device->gpu_barrier(); + BornCWCST.device->serialize_init(); if (message) fprintf(screen,"Done.\n"); } diff --git a/lib/gpu/lal_born_coul_wolf_ext.cpp b/lib/gpu/lal_born_coul_wolf_ext.cpp index bc38db1b9c..a3e8fd44f2 100644 --- a/lib/gpu/lal_born_coul_wolf_ext.cpp +++ b/lib/gpu/lal_born_coul_wolf_ext.cpp @@ -86,7 +86,7 @@ int borncw_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv, host_cut_coulsq, host_special_coul, qqrd2e, alf, e_shift, f_shift); - BORNCWMF.device->gpu_barrier(); + BORNCWMF.device->serialize_init(); if (message) fprintf(screen,"Done.\n"); } diff --git a/lib/gpu/lal_born_ext.cpp b/lib/gpu/lal_born_ext.cpp index 2321a1264d..ca2b79865a 100644 --- a/lib/gpu/lal_born_ext.cpp +++ b/lib/gpu/lal_born_ext.cpp @@ -80,7 +80,7 @@ int born_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv, offset, special_lj, inum, nall, max_nbors, maxspecial, cell_size, gpu_split, screen); - BORNMF.device->gpu_barrier(); + BORNMF.device->serialize_init(); if (message) fprintf(screen,"Done.\n"); } @@ -114,7 +114,7 @@ void born_gpu_reinit(const int ntypes, double **host_rhoinv, BORNMF.reinit(ntypes, host_rhoinv, host_born1, host_born2, host_born3, host_a, host_c, host_d, offset); - BORNMF.device->gpu_barrier(); + BORNMF.device->serialize_init(); } } diff --git a/lib/gpu/lal_buck_coul_ext.cpp b/lib/gpu/lal_buck_coul_ext.cpp index 9cf8f9b00e..cb07daecc4 100644 --- a/lib/gpu/lal_buck_coul_ext.cpp +++ b/lib/gpu/lal_buck_coul_ext.cpp @@ -83,7 +83,7 @@ int buckc_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv, host_cut_ljsq, host_cut_coulsq, host_special_coul, qqrd2e); - BUCKCMF.device->gpu_barrier(); + BUCKCMF.device->serialize_init(); if (message) fprintf(screen,"Done.\n"); } diff --git a/lib/gpu/lal_buck_coul_long_ext.cpp b/lib/gpu/lal_buck_coul_long_ext.cpp index 393ccc3feb..3307b8f5bc 100644 --- a/lib/gpu/lal_buck_coul_long_ext.cpp +++ b/lib/gpu/lal_buck_coul_long_ext.cpp @@ -82,7 +82,7 @@ int buckcl_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv, maxspecial, cell_size, gpu_split, screen, host_cut_ljsq, host_cut_coulsq, host_special_coul, qqrd2e, g_ewald); - BUCKCLMF.device->gpu_barrier(); + BUCKCLMF.device->serialize_init(); if (message) fprintf(screen,"Done.\n"); } diff --git a/lib/gpu/lal_buck_ext.cpp b/lib/gpu/lal_buck_ext.cpp index 738b33337d..70915cb227 100644 --- a/lib/gpu/lal_buck_ext.cpp +++ b/lib/gpu/lal_buck_ext.cpp @@ -77,7 +77,7 @@ int buck_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv, host_a, host_c, offset, special_lj, inum, nall, max_nbors, maxspecial, cell_size, gpu_split, screen); - BUCKMF.device->gpu_barrier(); + BUCKMF.device->serialize_init(); if (message) fprintf(screen,"Done.\n"); } @@ -110,7 +110,7 @@ void buck_gpu_reinit(const int ntypes, double **cutsq, double **host_rhoinv, BUCKMF.reinit(ntypes, cutsq, host_rhoinv, host_buck1, host_buck2, host_a, host_c, offset); - BUCKMF.device->gpu_barrier(); + BUCKMF.device->serialize_init(); } } diff --git a/lib/gpu/lal_charmm_ext.cpp b/lib/gpu/lal_charmm_ext.cpp index bed2f21933..471599b1b1 100644 --- a/lib/gpu/lal_charmm_ext.cpp +++ b/lib/gpu/lal_charmm_ext.cpp @@ -88,7 +88,7 @@ int crm_gpu_init(const int ntypes, double cut_bothsq, double **host_lj1, qqrd2e, cut_lj_innersq, cut_coul_innersq, denom_lj, denom_coul, epsilon, sigma, mix_arithmetic); - CRMMF.device->gpu_barrier(); + CRMMF.device->serialize_init(); if (message) fprintf(screen,"Done.\n"); } diff --git a/lib/gpu/lal_charmm_long_ext.cpp b/lib/gpu/lal_charmm_long_ext.cpp index 13565f5682..dfe3d99383 100644 --- a/lib/gpu/lal_charmm_long_ext.cpp +++ b/lib/gpu/lal_charmm_long_ext.cpp @@ -86,7 +86,7 @@ int crml_gpu_init(const int ntypes, double cut_bothsq, double **host_lj1, qqrd2e, g_ewald, cut_lj_innersq, denom_lj, epsilon, sigma, mix_arithmetic); - CRMLMF.device->gpu_barrier(); + CRMLMF.device->serialize_init(); if (message) fprintf(screen,"Done.\n"); } diff --git a/lib/gpu/lal_colloid_ext.cpp b/lib/gpu/lal_colloid_ext.cpp index dcfd1a6d34..cfd050e486 100644 --- a/lib/gpu/lal_colloid_ext.cpp +++ b/lib/gpu/lal_colloid_ext.cpp @@ -83,7 +83,7 @@ int colloid_gpu_init(const int ntypes, double **cutsq, double **host_lj1, inum, nall, max_nbors, maxspecial, cell_size, gpu_split, screen); - COLLMF.device->gpu_barrier(); + COLLMF.device->serialize_init(); if (message) fprintf(screen,"Done.\n"); } diff --git a/lib/gpu/lal_coul_debye_ext.cpp b/lib/gpu/lal_coul_debye_ext.cpp index 516dca5df8..fbe319d499 100644 --- a/lib/gpu/lal_coul_debye_ext.cpp +++ b/lib/gpu/lal_coul_debye_ext.cpp @@ -74,7 +74,7 @@ int cdebye_gpu_init(const int ntypes, double **host_scale, double **cutsq, init_ok=CDEMF.init(ntypes, host_scale, cutsq, host_special_coul, inum, nall, max_nbors, maxspecial, cell_size, gpu_split, screen, qqrd2e, kappa); - CDEMF.device->gpu_barrier(); + CDEMF.device->serialize_init(); if (message) fprintf(screen,"Done.\n"); } @@ -103,7 +103,7 @@ void cdebye_gpu_reinit(const int ntypes, double **host_scale) { if (gpu_rank==i && world_me!=0) CDEMF.reinit(ntypes, host_scale); - CDEMF.device->gpu_barrier(); + CDEMF.device->serialize_init(); } } diff --git a/lib/gpu/lal_coul_dsf_ext.cpp b/lib/gpu/lal_coul_dsf_ext.cpp index e21c70ae4b..9654d1e1a3 100644 --- a/lib/gpu/lal_coul_dsf_ext.cpp +++ b/lib/gpu/lal_coul_dsf_ext.cpp @@ -77,7 +77,7 @@ int cdsf_gpu_init(const int ntypes, const int inum, const int nall, gpu_split, screen, host_cut_coulsq, host_special_coul, qqrd2e, e_shift, f_shift, alpha); - CDMF.device->gpu_barrier(); + CDMF.device->serialize_init(); if (message) fprintf(screen,"Done.\n"); } diff --git a/lib/gpu/lal_coul_ext.cpp b/lib/gpu/lal_coul_ext.cpp index 370c186123..91d4433e9a 100644 --- a/lib/gpu/lal_coul_ext.cpp +++ b/lib/gpu/lal_coul_ext.cpp @@ -74,7 +74,7 @@ int coul_gpu_init(const int ntypes, double **host_scale, init_ok=COULMF.init(ntypes, host_scale, cutsq, special_coul, inum, nall, max_nbors, maxspecial, cell_size, gpu_split, screen, qqrd2e); - COULMF.device->gpu_barrier(); + COULMF.device->serialize_init(); if (message) fprintf(screen,"Done.\n"); } @@ -103,7 +103,7 @@ void coul_gpu_reinit(const int ntypes, double **host_scale) { if (gpu_rank==i && world_me!=0) COULMF.reinit(ntypes, host_scale); - COULMF.device->gpu_barrier(); + COULMF.device->serialize_init(); } } diff --git a/lib/gpu/lal_coul_long_cs_ext.cpp b/lib/gpu/lal_coul_long_cs_ext.cpp index df92619f2f..be4630516c 100644 --- a/lib/gpu/lal_coul_long_cs_ext.cpp +++ b/lib/gpu/lal_coul_long_cs_ext.cpp @@ -76,7 +76,7 @@ int clcs_gpu_init(const int ntypes, double **host_scale, cell_size, gpu_split, screen, host_cut_coulsq, host_special_coul, qqrd2e, g_ewald); - CLCSMF.device->gpu_barrier(); + CLCSMF.device->serialize_init(); if (message) fprintf(screen,"Done.\n"); } @@ -105,7 +105,7 @@ void clcs_gpu_reinit(const int ntypes, double **host_scale) { if (gpu_rank==i && world_me!=0) CLCSMF.reinit(ntypes, host_scale); - CLCSMF.device->gpu_barrier(); + CLCSMF.device->serialize_init(); } } diff --git a/lib/gpu/lal_coul_long_ext.cpp b/lib/gpu/lal_coul_long_ext.cpp index 1d9dcfdeca..034d23f507 100644 --- a/lib/gpu/lal_coul_long_ext.cpp +++ b/lib/gpu/lal_coul_long_ext.cpp @@ -76,7 +76,7 @@ int cl_gpu_init(const int ntypes, double **host_scale, cell_size, gpu_split, screen, host_cut_coulsq, host_special_coul, qqrd2e, g_ewald); - CLMF.device->gpu_barrier(); + CLMF.device->serialize_init(); if (message) fprintf(screen,"Done.\n"); } @@ -105,7 +105,7 @@ void cl_gpu_reinit(const int ntypes, double **host_scale) { if (gpu_rank==i && world_me!=0) CLMF.reinit(ntypes, host_scale); - CLMF.device->gpu_barrier(); + CLMF.device->serialize_init(); } } diff --git a/lib/gpu/lal_device.cpp b/lib/gpu/lal_device.cpp index 5438df2a78..c28404ab78 100644 --- a/lib/gpu/lal_device.cpp +++ b/lib/gpu/lal_device.cpp @@ -328,7 +328,7 @@ int DeviceT::init_device(MPI_Comm world, MPI_Comm replica, const int ngpu, for (int i=0; i<_procs_per_gpu; i++) { if (_gpu_rank==i) flag=compile_kernels(); - gpu_barrier(); + serialize_init(); } // check if double precision support is available @@ -609,6 +609,10 @@ void DeviceT::init_message(FILE *screen, const char *name, int last=last_gpu+1; if (last>gpu->num_devices()) last=gpu->num_devices(); + if (gpu->num_platforms()>1) { + std::string pname=gpu->platform_name(); + fprintf(screen,"Platform: %s\n",pname.c_str()); + } for (int i=first_gpu; igpu_barrier(); + DPLMF.device->serialize_init(); if (message) fprintf(screen,"Done.\n"); } diff --git a/lib/gpu/lal_dipole_lj_sf_ext.cpp b/lib/gpu/lal_dipole_lj_sf_ext.cpp index 0879702887..69469630b1 100644 --- a/lib/gpu/lal_dipole_lj_sf_ext.cpp +++ b/lib/gpu/lal_dipole_lj_sf_ext.cpp @@ -80,7 +80,7 @@ int dplsf_gpu_init(const int ntypes, double **cutsq, double **host_lj1, cell_size, gpu_split, screen, host_cut_ljsq, host_cut_coulsq, host_special_coul, qqrd2e); - DPLSFMF.device->gpu_barrier(); + DPLSFMF.device->serialize_init(); if (message) fprintf(screen,"Done.\n"); } diff --git a/lib/gpu/lal_dipole_long_lj_ext.cpp b/lib/gpu/lal_dipole_long_lj_ext.cpp index fd61706ba9..9ba5f3ccf3 100644 --- a/lib/gpu/lal_dipole_long_lj_ext.cpp +++ b/lib/gpu/lal_dipole_long_lj_ext.cpp @@ -81,7 +81,7 @@ int dplj_gpu_init(const int ntypes, double **cutsq, double **host_lj1, cell_size, gpu_split, screen, host_cut_ljsq, host_cut_coulsq, host_special_coul, qqrd2e, g_ewald); - DPLJMF.device->gpu_barrier(); + DPLJMF.device->serialize_init(); if (message) fprintf(screen,"Done.\n"); } diff --git a/lib/gpu/lal_dpd_ext.cpp b/lib/gpu/lal_dpd_ext.cpp index 7637ff03c0..2f7ef3e7e2 100644 --- a/lib/gpu/lal_dpd_ext.cpp +++ b/lib/gpu/lal_dpd_ext.cpp @@ -76,7 +76,7 @@ int dpd_gpu_init(const int ntypes, double **cutsq, double **host_a0, host_cut, special_lj, false, inum, nall, max_nbors, maxspecial, cell_size, gpu_split, screen); - DPDMF.device->gpu_barrier(); + DPDMF.device->serialize_init(); if (message) fprintf(screen,"Done.\n"); } diff --git a/lib/gpu/lal_dpd_tstat_ext.cpp b/lib/gpu/lal_dpd_tstat_ext.cpp index 0e60e3fccc..2b63bf62e7 100644 --- a/lib/gpu/lal_dpd_tstat_ext.cpp +++ b/lib/gpu/lal_dpd_tstat_ext.cpp @@ -76,7 +76,7 @@ int dpd_tstat_gpu_init(const int ntypes, double **cutsq, double **host_a0, host_cut, special_lj, true, inum, nall, 300, maxspecial, cell_size, gpu_split, screen); - DPDTMF.device->gpu_barrier(); + DPDTMF.device->serialize_init(); if (message) fprintf(screen,"Done.\n"); } diff --git a/lib/gpu/lal_eam_alloy_ext.cpp b/lib/gpu/lal_eam_alloy_ext.cpp index 5a3dfb9d6d..488692bd2f 100644 --- a/lib/gpu/lal_eam_alloy_ext.cpp +++ b/lib/gpu/lal_eam_alloy_ext.cpp @@ -90,7 +90,7 @@ int eam_alloy_gpu_init(const int ntypes, double host_cutforcesq, nz2r, nfrho, nr, nlocal, nall, max_nbors, maxspecial, cell_size, gpu_split, screen); - EAMALMF.device->gpu_barrier(); + EAMALMF.device->serialize_init(); if (message) fprintf(screen,"Done.\n"); } diff --git a/lib/gpu/lal_eam_ext.cpp b/lib/gpu/lal_eam_ext.cpp index a884335bd9..f3e16a9589 100644 --- a/lib/gpu/lal_eam_ext.cpp +++ b/lib/gpu/lal_eam_ext.cpp @@ -90,7 +90,7 @@ int eam_gpu_init(const int ntypes, double host_cutforcesq, nz2r, nfrho, nr, nlocal, nall, max_nbors, maxspecial, cell_size, gpu_split, screen); - EAMMF.device->gpu_barrier(); + EAMMF.device->serialize_init(); if (message) fprintf(screen,"Done.\n"); } diff --git a/lib/gpu/lal_eam_fs_ext.cpp b/lib/gpu/lal_eam_fs_ext.cpp index 5aad871237..72bf439a4d 100644 --- a/lib/gpu/lal_eam_fs_ext.cpp +++ b/lib/gpu/lal_eam_fs_ext.cpp @@ -90,7 +90,7 @@ int eam_fs_gpu_init(const int ntypes, double host_cutforcesq, nz2r, nfrho, nr, nlocal, nall, max_nbors, maxspecial, cell_size, gpu_split, screen); - EAMFSMF.device->gpu_barrier(); + EAMFSMF.device->serialize_init(); if (message) fprintf(screen,"Done.\n"); } diff --git a/lib/gpu/lal_gauss_ext.cpp b/lib/gpu/lal_gauss_ext.cpp index afec2e86f2..2e00223ac9 100644 --- a/lib/gpu/lal_gauss_ext.cpp +++ b/lib/gpu/lal_gauss_ext.cpp @@ -76,7 +76,7 @@ int gauss_gpu_init(const int ntypes, double **cutsq, double **host_a, offset, special_lj, inum, nall, max_nbors, maxspecial, cell_size, gpu_split, screen); - GLMF.device->gpu_barrier(); + GLMF.device->serialize_init(); if (message) fprintf(screen,"Done.\n"); } @@ -106,7 +106,7 @@ void gauss_gpu_reinit(const int ntypes, double **cutsq, double **host_a, if (gpu_rank==i && world_me!=0) GLMF.reinit(ntypes, cutsq, host_a, host_b, offset); - GLMF.device->gpu_barrier(); + GLMF.device->serialize_init(); } } diff --git a/lib/gpu/lal_gayberne_ext.cpp b/lib/gpu/lal_gayberne_ext.cpp index 56aad61632..864da8e7ad 100644 --- a/lib/gpu/lal_gayberne_ext.cpp +++ b/lib/gpu/lal_gayberne_ext.cpp @@ -83,7 +83,7 @@ int gb_gpu_init(const int ntypes, const double gamma, host_lj3, host_lj4, offset, special_lj, inum, nall, max_nbors, maxspecial, cell_size, gpu_split, screen); - GBMF.device->gpu_barrier(); + GBMF.device->serialize_init(); if (message) fprintf(screen,"Done.\n"); } diff --git a/lib/gpu/lal_lj96_ext.cpp b/lib/gpu/lal_lj96_ext.cpp index be7ffc5a09..b8d191d31e 100644 --- a/lib/gpu/lal_lj96_ext.cpp +++ b/lib/gpu/lal_lj96_ext.cpp @@ -76,7 +76,7 @@ int lj96_gpu_init(const int ntypes, double **cutsq, double **host_lj1, offset, special_lj, inum, nall, max_nbors, maxspecial, cell_size, gpu_split, screen); - LJ96MF.device->gpu_barrier(); + LJ96MF.device->serialize_init(); if (message) fprintf(screen,"Done.\n"); } diff --git a/lib/gpu/lal_lj_class2_long_ext.cpp b/lib/gpu/lal_lj_class2_long_ext.cpp index 311b027536..66eda209c1 100644 --- a/lib/gpu/lal_lj_class2_long_ext.cpp +++ b/lib/gpu/lal_lj_class2_long_ext.cpp @@ -81,7 +81,7 @@ int c2cl_gpu_init(const int ntypes, double **cutsq, double **host_lj1, cell_size, gpu_split, screen, host_cut_ljsq, host_cut_coulsq, host_special_coul, qqrd2e, g_ewald); - C2CLMF.device->gpu_barrier(); + C2CLMF.device->serialize_init(); if (message) fprintf(screen,"Done.\n"); } diff --git a/lib/gpu/lal_lj_coul_debye_ext.cpp b/lib/gpu/lal_lj_coul_debye_ext.cpp index 4f81b01457..c6c9baef0f 100644 --- a/lib/gpu/lal_lj_coul_debye_ext.cpp +++ b/lib/gpu/lal_lj_coul_debye_ext.cpp @@ -81,7 +81,7 @@ int ljcd_gpu_init(const int ntypes, double **cutsq, double **host_lj1, cell_size, gpu_split, screen, host_cut_ljsq, host_cut_coulsq, host_special_coul, qqrd2e, kappa); - LJCDMF.device->gpu_barrier(); + LJCDMF.device->serialize_init(); if (message) fprintf(screen,"Done.\n"); } diff --git a/lib/gpu/lal_lj_coul_ext.cpp b/lib/gpu/lal_lj_coul_ext.cpp index 5b7f97e630..4011f4bb8c 100644 --- a/lib/gpu/lal_lj_coul_ext.cpp +++ b/lib/gpu/lal_lj_coul_ext.cpp @@ -80,7 +80,7 @@ int ljc_gpu_init(const int ntypes, double **cutsq, double **host_lj1, cell_size, gpu_split, screen, host_cut_ljsq, host_cut_coulsq, host_special_coul, qqrd2e); - LJCMF.device->gpu_barrier(); + LJCMF.device->serialize_init(); if (message) fprintf(screen,"Done.\n"); } diff --git a/lib/gpu/lal_lj_coul_long_ext.cpp b/lib/gpu/lal_lj_coul_long_ext.cpp index 6a027bdc7e..578e38e4a0 100644 --- a/lib/gpu/lal_lj_coul_long_ext.cpp +++ b/lib/gpu/lal_lj_coul_long_ext.cpp @@ -81,7 +81,7 @@ int ljcl_gpu_init(const int ntypes, double **cutsq, double **host_lj1, cell_size, gpu_split, screen, host_cut_ljsq, host_cut_coulsq, host_special_coul, qqrd2e, g_ewald); - LJCLMF.device->gpu_barrier(); + LJCLMF.device->serialize_init(); if (message) fprintf(screen,"Done.\n"); } @@ -112,7 +112,7 @@ void ljcl_gpu_reinit(const int ntypes, double **cutsq, double **host_lj1, if (gpu_rank==i && world_me!=0) LJCLMF.reinit(ntypes, cutsq, host_lj1, host_lj2, host_lj3, host_lj4, offset, host_cut_ljsq); - LJCLMF.device->gpu_barrier(); + LJCLMF.device->serialize_init(); } } diff --git a/lib/gpu/lal_lj_coul_msm_ext.cpp b/lib/gpu/lal_lj_coul_msm_ext.cpp index 2d9d77fe77..05b1991ff9 100644 --- a/lib/gpu/lal_lj_coul_msm_ext.cpp +++ b/lib/gpu/lal_lj_coul_msm_ext.cpp @@ -83,7 +83,7 @@ int ljcm_gpu_init(const int ntypes, double **cutsq, double **host_lj1, cell_size, gpu_split, screen, host_cut_ljsq, host_cut_coulsq, host_special_coul, order, qqrd2e); - LJCMLMF.device->gpu_barrier(); + LJCMLMF.device->serialize_init(); if (message) fprintf(screen,"Done.\n"); } diff --git a/lib/gpu/lal_lj_cubic_ext.cpp b/lib/gpu/lal_lj_cubic_ext.cpp index 2f8ebac37b..d11199cee9 100644 --- a/lib/gpu/lal_lj_cubic_ext.cpp +++ b/lib/gpu/lal_lj_cubic_ext.cpp @@ -80,7 +80,7 @@ int ljcb_gpu_init(const int ntypes, double **cutsq, double **cut_inner_sq, special_lj, inum, nall, max_nbors, maxspecial, cell_size, gpu_split, screen); - LJCubicLMF.device->gpu_barrier(); + LJCubicLMF.device->serialize_init(); if (message) fprintf(screen,"Done.\n"); } diff --git a/lib/gpu/lal_lj_dsf_ext.cpp b/lib/gpu/lal_lj_dsf_ext.cpp index e70059261c..c1e5b2de38 100644 --- a/lib/gpu/lal_lj_dsf_ext.cpp +++ b/lib/gpu/lal_lj_dsf_ext.cpp @@ -84,7 +84,7 @@ int ljd_gpu_init(const int ntypes, double **cutsq, double **host_lj1, host_cut_coulsq, host_special_coul, qqrd2e, e_shift, f_shift, alpha); - LJDMF.device->gpu_barrier(); + LJDMF.device->serialize_init(); if (message) fprintf(screen,"Done.\n"); } diff --git a/lib/gpu/lal_lj_expand_coul_long_ext.cpp b/lib/gpu/lal_lj_expand_coul_long_ext.cpp index e5506dd7aa..a6a04d7cc8 100644 --- a/lib/gpu/lal_lj_expand_coul_long_ext.cpp +++ b/lib/gpu/lal_lj_expand_coul_long_ext.cpp @@ -81,7 +81,7 @@ int ljecl_gpu_init(const int ntypes, double **cutsq, double **host_lj1, cell_size, gpu_split, screen, host_cut_ljsq, host_cut_coulsq, host_special_coul, qqrd2e, g_ewald); - LJECLMF.device->gpu_barrier(); + LJECLMF.device->serialize_init(); if (message) fprintf(screen,"Done.\n"); } @@ -112,7 +112,7 @@ void ljecl_gpu_reinit(const int ntypes, double **cutsq, double **host_lj1, if (gpu_rank==i && world_me!=0) LJECLMF.reinit(ntypes, cutsq, host_lj1, host_lj2, host_lj3, host_lj4, offset, shift, host_cut_ljsq); - LJECLMF.device->gpu_barrier(); + LJECLMF.device->serialize_init(); } } diff --git a/lib/gpu/lal_lj_expand_ext.cpp b/lib/gpu/lal_lj_expand_ext.cpp index 02decf2712..7d2e0fd306 100644 --- a/lib/gpu/lal_lj_expand_ext.cpp +++ b/lib/gpu/lal_lj_expand_ext.cpp @@ -108,7 +108,7 @@ void lje_gpu_reinit(const int ntypes, double **cutsq, double **host_lj1, if (gpu_rank==i && world_me!=0) LJEMF.reinit(ntypes, cutsq, host_lj1, host_lj2, host_lj3, host_lj4, offset, shift); - LJEMF.device->gpu_barrier(); + LJEMF.device->serialize_init(); } } diff --git a/lib/gpu/lal_lj_ext.cpp b/lib/gpu/lal_lj_ext.cpp index fa00fc4f64..9a1d03780d 100644 --- a/lib/gpu/lal_lj_ext.cpp +++ b/lib/gpu/lal_lj_ext.cpp @@ -76,7 +76,7 @@ int ljl_gpu_init(const int ntypes, double **cutsq, double **host_lj1, offset, special_lj, inum, nall, max_nbors, maxspecial, cell_size, gpu_split, screen); - LJLMF.device->gpu_barrier(); + LJLMF.device->serialize_init(); if (message) fprintf(screen,"Done.\n"); } @@ -105,7 +105,7 @@ void ljl_gpu_reinit(const int ntypes, double **cutsq, double **host_lj1, for (int i=0; igpu_barrier(); + LJLMF.device->serialize_init(); } } diff --git a/lib/gpu/lal_lj_gromacs_ext.cpp b/lib/gpu/lal_lj_gromacs_ext.cpp index 19d1d12513..289b6a30a3 100644 --- a/lib/gpu/lal_lj_gromacs_ext.cpp +++ b/lib/gpu/lal_lj_gromacs_ext.cpp @@ -81,7 +81,7 @@ int ljgrm_gpu_init(const int ntypes, double **cutsq, double **host_lj1, gpu_split, screen, host_ljsw1, host_ljsw2, host_ljsw3, host_ljsw4, host_ljsw5, cut_inner, cut_inner_sq); - LJGRMMF.device->gpu_barrier(); + LJGRMMF.device->serialize_init(); if (message) fprintf(screen,"Done.\n"); } diff --git a/lib/gpu/lal_lj_smooth_ext.cpp b/lib/gpu/lal_lj_smooth_ext.cpp index 48dad74071..5d392d163f 100644 --- a/lib/gpu/lal_lj_smooth_ext.cpp +++ b/lib/gpu/lal_lj_smooth_ext.cpp @@ -80,7 +80,7 @@ int ljsmt_gpu_init(const int ntypes, double **cutsq, double **host_lj1, cell_size, gpu_split, screen, host_ljsw0, host_ljsw1, host_ljsw2, host_ljsw3, host_ljsw4, cut_inner, cut_inner_sq); - LJSMTMF.device->gpu_barrier(); + LJSMTMF.device->serialize_init(); if (message) fprintf(screen,"Done.\n"); } @@ -110,7 +110,7 @@ void ljsmt_gpu_reinit(const int ntypes, double **cutsq, double **host_lj1, for (int i=0; igpu_barrier(); + LJSMTMF.device->serialize_init(); } } diff --git a/lib/gpu/lal_lj_spica_ext.cpp b/lib/gpu/lal_lj_spica_ext.cpp index 1467c1806a..119e40ee33 100644 --- a/lib/gpu/lal_lj_spica_ext.cpp +++ b/lib/gpu/lal_lj_spica_ext.cpp @@ -77,7 +77,7 @@ int spica_gpu_init(const int ntypes, double **cutsq, int **cg_types, host_lj4, offset, special_lj, inum, nall, max_nbors, maxspecial, cell_size, gpu_split, screen); - CMMMF.device->gpu_barrier(); + CMMMF.device->serialize_init(); if (message) fprintf(screen,"Done.\n"); } diff --git a/lib/gpu/lal_lj_spica_long_ext.cpp b/lib/gpu/lal_lj_spica_long_ext.cpp index 293801391f..298fde2fd5 100644 --- a/lib/gpu/lal_lj_spica_long_ext.cpp +++ b/lib/gpu/lal_lj_spica_long_ext.cpp @@ -81,7 +81,7 @@ int spical_gpu_init(const int ntypes, double **cutsq, int **cg_type, maxspecial, cell_size, gpu_split, screen, host_cut_ljsq, host_cut_coulsq, host_special_coul, qqrd2e, g_ewald); - CMMLMF.device->gpu_barrier(); + CMMLMF.device->serialize_init(); if (message) fprintf(screen,"Done.\n"); } diff --git a/lib/gpu/lal_lj_tip4p_long_ext.cpp b/lib/gpu/lal_lj_tip4p_long_ext.cpp index 7395506c2d..1733772618 100644 --- a/lib/gpu/lal_lj_tip4p_long_ext.cpp +++ b/lib/gpu/lal_lj_tip4p_long_ext.cpp @@ -89,7 +89,7 @@ int ljtip4p_long_gpu_init(const int ntypes, double **cutsq, double **host_lj1, host_special_coul, qqrd2e, g_ewald, map_size, max_same); - LJTIP4PLMF.device->gpu_barrier(); + LJTIP4PLMF.device->serialize_init(); if (message) fprintf(screen,"Done.\n"); } diff --git a/lib/gpu/lal_mie_ext.cpp b/lib/gpu/lal_mie_ext.cpp index 5cbb9c29d2..a7b549139a 100644 --- a/lib/gpu/lal_mie_ext.cpp +++ b/lib/gpu/lal_mie_ext.cpp @@ -80,7 +80,7 @@ int mie_gpu_init(const int ntypes, double **cutsq, double **host_mie1, offset, special_lj, inum, nall, max_nbors, maxspecial, cell_size, gpu_split, screen); - MLMF.device->gpu_barrier(); + MLMF.device->serialize_init(); if (message) fprintf(screen,"Done.\n"); } diff --git a/lib/gpu/lal_morse_ext.cpp b/lib/gpu/lal_morse_ext.cpp index f43676a1b5..41c1076f7d 100644 --- a/lib/gpu/lal_morse_ext.cpp +++ b/lib/gpu/lal_morse_ext.cpp @@ -77,7 +77,7 @@ int mor_gpu_init(const int ntypes, double **cutsq, offset, special_lj, inum, nall, max_nbors, maxspecial, cell_size, gpu_split, screen); - MORMF.device->gpu_barrier(); + MORMF.device->serialize_init(); if (message) fprintf(screen,"Done.\n"); } diff --git a/lib/gpu/lal_pppm_ext.cpp b/lib/gpu/lal_pppm_ext.cpp index cf009b4479..bdb83378f7 100644 --- a/lib/gpu/lal_pppm_ext.cpp +++ b/lib/gpu/lal_pppm_ext.cpp @@ -81,7 +81,7 @@ grdtyp * pppm_gpu_init(memtyp &pppm, const int nlocal, const int nall, vd_brick,slab_volfactor,nx_pppm,ny_pppm,nz_pppm, split,success); - pppm.device->gpu_barrier(); + pppm.device->serialize_init(); if (message) fprintf(screen,"Done.\n"); } diff --git a/lib/gpu/lal_re_squared_ext.cpp b/lib/gpu/lal_re_squared_ext.cpp index e1eb8a45b0..17c508f1f1 100644 --- a/lib/gpu/lal_re_squared_ext.cpp +++ b/lib/gpu/lal_re_squared_ext.cpp @@ -80,7 +80,7 @@ int re_gpu_init(const int ntypes, double **shape, double **well, double **cutsq, host_lj4, offset, special_lj, inum, nall, max_nbors, maxspecial, cell_size, gpu_split, screen); - REMF.device->gpu_barrier(); + REMF.device->serialize_init(); if (message) fprintf(screen,"Done.\n"); } diff --git a/lib/gpu/lal_soft_ext.cpp b/lib/gpu/lal_soft_ext.cpp index a32a5e5a00..879150ddd9 100644 --- a/lib/gpu/lal_soft_ext.cpp +++ b/lib/gpu/lal_soft_ext.cpp @@ -76,7 +76,7 @@ int soft_gpu_init(const int ntypes, double **cutsq, double **host_prefactor, special_lj, inum, nall, max_nbors, maxspecial, cell_size, gpu_split, screen); - SLMF.device->gpu_barrier(); + SLMF.device->serialize_init(); if (message) fprintf(screen,"Done.\n"); } @@ -106,7 +106,7 @@ void soft_gpu_reinit(const int ntypes, double **cutsq, double **host_prefactor, if (gpu_rank==i && world_me!=0) SLMF.reinit(ntypes, cutsq, host_prefactor, host_cut); - SLMF.device->gpu_barrier(); + SLMF.device->serialize_init(); } } diff --git a/lib/gpu/lal_sw_ext.cpp b/lib/gpu/lal_sw_ext.cpp index 5158f135a3..5aa3af8757 100644 --- a/lib/gpu/lal_sw_ext.cpp +++ b/lib/gpu/lal_sw_ext.cpp @@ -84,7 +84,7 @@ int sw_gpu_init(const int ntypes, const int inum, const int nall, sigma_gamma, c1, c2, c3, c4, c5, c6, lambda_epsilon, costheta, map, e2param); - SWMF.device->gpu_barrier(); + SWMF.device->serialize_init(); if (message) fprintf(screen,"Done.\n"); } diff --git a/lib/gpu/lal_table_ext.cpp b/lib/gpu/lal_table_ext.cpp index 6237c4d7cd..8fc118c770 100644 --- a/lib/gpu/lal_table_ext.cpp +++ b/lib/gpu/lal_table_ext.cpp @@ -76,7 +76,7 @@ int table_gpu_init(const int ntypes, double **cutsq, double ***table_coeffs, special_lj, inum, nall, max_nbors, maxspecial, cell_size, gpu_split, screen, tabstyle, ntables, tablength); - TBMF.device->gpu_barrier(); + TBMF.device->serialize_init(); if (message) fprintf(screen,"Done.\n"); } diff --git a/lib/gpu/lal_tersoff_ext.cpp b/lib/gpu/lal_tersoff_ext.cpp index ac700d014a..5092809344 100644 --- a/lib/gpu/lal_tersoff_ext.cpp +++ b/lib/gpu/lal_tersoff_ext.cpp @@ -91,7 +91,7 @@ int tersoff_gpu_init(const int ntypes, const int inum, const int nall, const int ts_c1, ts_c2, ts_c3, ts_c4, ts_c, ts_d, ts_h, ts_gamma, ts_beta, ts_powern, ts_cutsq); - TSMF.device->gpu_barrier(); + TSMF.device->serialize_init(); if (message) fprintf(screen,"Done.\n"); } diff --git a/lib/gpu/lal_tersoff_mod_ext.cpp b/lib/gpu/lal_tersoff_mod_ext.cpp index cac284fb70..7310191a5d 100644 --- a/lib/gpu/lal_tersoff_mod_ext.cpp +++ b/lib/gpu/lal_tersoff_mod_ext.cpp @@ -91,7 +91,7 @@ int tersoff_mod_gpu_init(const int ntypes, const int inum, const int nall, ts_c3, ts_c4, ts_c5, ts_h, ts_beta, ts_powern, ts_powern_del, ts_ca1, ts_cutsq); - TSMMF.device->gpu_barrier(); + TSMMF.device->serialize_init(); if (message) fprintf(screen,"Done.\n"); } diff --git a/lib/gpu/lal_tersoff_zbl_ext.cpp b/lib/gpu/lal_tersoff_zbl_ext.cpp index 518b535627..ad3538b464 100644 --- a/lib/gpu/lal_tersoff_zbl_ext.cpp +++ b/lib/gpu/lal_tersoff_zbl_ext.cpp @@ -102,7 +102,7 @@ int tersoff_zbl_gpu_init(const int ntypes, const int inum, const int nall, ts_ZBLcut, ts_ZBLexpscale, global_e, global_a_0, global_epsilon_0, ts_cutsq); - TSZMF.device->gpu_barrier(); + TSZMF.device->serialize_init(); if (message) fprintf(screen,"Done.\n"); } diff --git a/lib/gpu/lal_ufm_ext.cpp b/lib/gpu/lal_ufm_ext.cpp index 432cbb2e63..3f6740f934 100644 --- a/lib/gpu/lal_ufm_ext.cpp +++ b/lib/gpu/lal_ufm_ext.cpp @@ -78,7 +78,7 @@ int ufml_gpu_init(const int ntypes, double **cutsq, double **host_uf1, offset, special_lj, inum, nall, max_nbors, maxspecial, cell_size, gpu_split, screen); - UFMLMF.device->gpu_barrier(); + UFMLMF.device->serialize_init(); if (message) fprintf(screen,"Done.\n"); } @@ -106,7 +106,7 @@ void ufml_gpu_reinit(const int ntypes, double **cutsq, double **host_uf1, for (int i=0; igpu_barrier(); + UFMLMF.device->serialize_init(); } } diff --git a/lib/gpu/lal_vashishta_ext.cpp b/lib/gpu/lal_vashishta_ext.cpp index ecbdefed19..2567688a74 100644 --- a/lib/gpu/lal_vashishta_ext.cpp +++ b/lib/gpu/lal_vashishta_ext.cpp @@ -89,7 +89,7 @@ int vashishta_gpu_init(const int ntypes, const int inum, const int nall, const i lam4inv, zizj, mbigd, dvrc, big6w, heta, bigh, bigw, c0, costheta, bigb, big2b, bigc); - VashishtaMF.device->gpu_barrier(); + VashishtaMF.device->serialize_init(); if (message) fprintf(screen,"Done.\n"); } diff --git a/lib/gpu/lal_yukawa_colloid_ext.cpp b/lib/gpu/lal_yukawa_colloid_ext.cpp index db86f91689..990a208c3c 100644 --- a/lib/gpu/lal_yukawa_colloid_ext.cpp +++ b/lib/gpu/lal_yukawa_colloid_ext.cpp @@ -76,7 +76,7 @@ int ykcolloid_gpu_init(const int ntypes, double **cutsq, double **host_a, inum, nall, max_nbors, maxspecial, cell_size, gpu_split, screen, kappa); - YKCOLLMF.device->gpu_barrier(); + YKCOLLMF.device->serialize_init(); if (message) fprintf(screen,"Done.\n"); } diff --git a/lib/gpu/lal_yukawa_ext.cpp b/lib/gpu/lal_yukawa_ext.cpp index cf2bf89e3d..062bf8d101 100644 --- a/lib/gpu/lal_yukawa_ext.cpp +++ b/lib/gpu/lal_yukawa_ext.cpp @@ -76,7 +76,7 @@ int yukawa_gpu_init(const int ntypes, double **cutsq, double kappa, inum, nall, max_nbors, maxspecial, cell_size, gpu_split, screen); - YKMF.device->gpu_barrier(); + YKMF.device->serialize_init(); if (message) fprintf(screen,"Done.\n"); } diff --git a/lib/gpu/lal_zbl_ext.cpp b/lib/gpu/lal_zbl_ext.cpp index ee7794af2d..e052380546 100644 --- a/lib/gpu/lal_zbl_ext.cpp +++ b/lib/gpu/lal_zbl_ext.cpp @@ -79,7 +79,7 @@ int zbl_gpu_init(const int ntypes, double **cutsq, double **host_sw1, cut_globalsq, cut_innersq, cut_inner, inum, nall, max_nbors, maxspecial, cell_size, gpu_split, screen); - ZBLMF.device->gpu_barrier(); + ZBLMF.device->serialize_init(); if (message) fprintf(screen,"Done.\n"); } diff --git a/lib/kokkos/BUILD.md b/lib/kokkos/BUILD.md index 114baf99f1..a8985ef1fd 100644 --- a/lib/kokkos/BUILD.md +++ b/lib/kokkos/BUILD.md @@ -27,7 +27,7 @@ When configuring your project just set: -DKokkos_ROOT=${kokkos_install_prefix} \ -DCMAKE_CXX_COMPILER=${compiler_used_to_build_kokkos} ```` -Note: You may need the following if using some versions of CMake (e.g. 3.12): +Note: You may need the following if your project requires a minimum CMake version older than 3.12: ````cmake cmake_policy(SET CMP0074 NEW) ```` @@ -171,6 +171,9 @@ Options can be enabled by specifying `-DKokkos_ENABLE_X`. * Kokkos_ENABLE_HPX_ASYNC_DISPATCH * Whether HPX supports asynchronous dispatch * BOOL Default: OFF +* Kokkos_ENABLE_IMPL_CUDA_MALLOC_ASYNC + * Whether to enable CudaMallocAsync (requires CUDA Toolkit 11.2). This is an experimental performance feature and currently has issue when using with UCX. See https://github.com/kokkos/kokkos/issues/4228 for more details. + * BOOL Default: OFF * Kokkos_ENABLE_LARGE_MEM_TESTS * Whether to perform extra large memory tests * BOOL_Default: OFF @@ -235,6 +238,9 @@ The following options control `find_package` paths for CMake-based TPLs: ## Architecture Keywords Architecture-specific optimizations can be enabled by specifying `-DKokkos_ARCH_X`. +* Kokkos_ARCH_NATIVE + * Whether to optimize for the the local CPU architecture + * BOOL Default: OFF * Kokkos_ARCH_AMDAVX * Whether to optimize for the AMDAVX architecture * BOOL Default: OFF @@ -310,12 +316,24 @@ Architecture-specific optimizations can be enabled by specifying `-DKokkos_ARCH_ * Kokkos_ARCH_POWER9 * Whether to optimize for the POWER9 architecture * BOOL Default: OFF +* Kokkos_ARCH_ICL + * Whether to optimize for the ICL architecture + * BOOL Default: OFF +* Kokkos_ARCH_ICX + * Whether to optimize for the ICX architecture + * BOOL Default: OFF +* Kokkos_ARCH_SKL + * Whether to optimize for the SKL architecture + * BOOL Default: OFF * Kokkos_ARCH_SKX * Whether to optimize for the SKX architecture * BOOL Default: OFF * Kokkos_ARCH_SNB * Whether to optimize for the SNB architecture * BOOL Default: OFF +* Kokkos_ARCH_SPR + * Whether to optimize for the SPR architecture + * BOOL Default: OFF * Kokkos_ARCH_TURING75 * Whether to optimize for the TURING75 architecture * BOOL Default: OFF diff --git a/lib/kokkos/CHANGELOG.md b/lib/kokkos/CHANGELOG.md index a908507704..e81f294451 100644 --- a/lib/kokkos/CHANGELOG.md +++ b/lib/kokkos/CHANGELOG.md @@ -1,5 +1,157 @@ # Change Log +## [3.7.00](https://github.com/kokkos/kokkos/tree/3.7.00) (2022-08-22) +[Full Changelog](https://github.com/kokkos/kokkos/compare/3.6.01...3.7.00) + +### Features: +- Use non-volatile `join()` member functions and `operator+=` in `parallel_reduce/scan` [\#4931](https://github.com/kokkos/kokkos/pull/4931) [\#4954](https://github.com/kokkos/kokkos/pull/4954) [\#4951](https://github.com/kokkos/kokkos/pull/4951) +- Add `SIMD` sub package (requires C++17) [\#5016](https://github.com/kokkos/kokkos/pull/5016) +- Add `is_finalized()` [\#5247](https://github.com/kokkos/kokkos/pull/5247) +- Promote mathematical functions from `namespace Kokkos::Experimental` to `namespace Kokkos` [\#4791](https://github.com/kokkos/kokkos/pull/4791) +- Promote `min`, `max`, `clamp`, `minmax` functions from `namespace Kokkos::Experimental` to `namespace Kokkos` [\#5170](https://github.com/kokkos/kokkos/pull/5170) +- Add `round`, `logb`, `nextafter`, `copysign`, and `signbit` math functions [\#4768](https://github.com/kokkos/kokkos/pull/4768) +- Add `HIPManagedSpace`, similar to `CudaUVMSpace` [\#5112](https://github.com/kokkos/kokkos/pull/5112) +- Accept view construction allocation properties in `create_mirror[_view,_view_and_copy]` and `resize/realloc` [\#5125](https://github.com/kokkos/kokkos/pull/5125) [\#5095](https://github.com/kokkos/kokkos/pull/5095) [\#5035](https://github.com/kokkos/kokkos/pull/5035) [\#4805](https://github.com/kokkos/kokkos/pull/4805) [\#4844](https://github.com/kokkos/kokkos/pull/4844) +- Allow `MemorySpace::allocate()` to be called with execution space [\#4826](https://github.com/kokkos/kokkos/pull/4826) +- Experimental: Compile time view subscriber [\#4197](https://github.com/kokkos/kokkos/pull/4197) + +### Backends and Archs Enhancements: +- Add support for Sapphire Rapids Intel architecture [\#5015](https://github.com/kokkos/kokkos/pull/5015) +- Add support for ICX, SKL and ICL Intel architectures [\#5013](https://github.com/kokkos/kokkos/pull/5013) [\#4929](https://github.com/kokkos/kokkos/pull/4929) +- Add arch flags for Intel GPU Ponte Vecchio [\#4932](https://github.com/kokkos/kokkos/pull/4932) +- SYCL: require GPU if GPU architecture was set at configuration time (i.e. do not allow fallback to CPU device) [\#5264](https://github.com/kokkos/kokkos/pull/5264) [\#5222](https://github.com/kokkos/kokkos/pull/5222) +- SYCL: Add `SYCL::sycl_queue()` for interoperability [\#5241](https://github.com/kokkos/kokkos/pull/5241) +- SYCL: Loosen restriction for using built-in `sycl::group_broadcast` [\#4552](https://github.com/kokkos/kokkos/pull/4552) +- SYCL: preserve address space [\#4396](https://github.com/kokkos/kokkos/pull/4396) +- OpenMPTarget: Adding a workaound for team scan [\#5219](https://github.com/kokkos/kokkos/pull/5219) +- OpenMPTarget: Adding logic to skip the kernel launch if `league_size=0` [\#5067](https://github.com/kokkos/kokkos/pull/5067) +- OpenMPTarget: Make sure `Kokkos::abort()` causes abnormal program termination when called on the host-side [\#4808](https://github.com/kokkos/kokkos/pull/4808) +- HIP: Make HIPHostPinnedSpace coarse-grained [\#5152](https://github.com/kokkos/kokkos/pull/5152) +- Refactor OpenMP `parallel_for` implementation to use more native OpenMP constructs [\#4664](https://github.com/kokkos/kokkos/pull/4664) +- Add option to optimize for local CPU architecture `Kokkos_ARCH_NATIVE` [\#4930](https://github.com/kokkos/kokkos/pull/4930) + + +### Implemented enhancements +- Add command line argument/environment variable to print the configuration [\#5233](https://github.com/kokkos/kokkos/pull/5233) +- Improve error message in view memory access violations [\#4950](https://github.com/kokkos/kokkos/pull/4950) +- Remove unnecessary fences in View initialization [\#4823](https://github.com/kokkos/kokkos/pull/4823) +- Make `View::shmem_size()` device-callable [\#4936](https://github.com/kokkos/kokkos/pull/4936) +- Update numerics support for `__float128` [\#5081](https://github.com/kokkos/kokkos/pull/5081) +- Add `log10` overload for `Kokkos::complex` [\#5009](https://github.com/kokkos/kokkos/pull/5009) +- Add `[[nodiscard]]` to `ScopeGuard` [\#5224](https://github.com/kokkos/kokkos/pull/5224) +- Add structured binding support for `Kokkos::Array` [\#4962](https://github.com/kokkos/kokkos/pull/4962) +- Enable accessing `Kokkos::Array` elements in constant expressions [\#4916](https://github.com/kokkos/kokkos/pull/4916) +- Mark `as_view_of_rank_n` as KOKKOS_FUNCTION [\#5248](https://github.com/kokkos/kokkos/pull/5248) +- Cleanup/rework fence overloads [\#5148](https://github.com/kokkos/kokkos/pull/5148) +- Assert that `Layout` construction from extents is valid in functions taking integer extents [\#5209](https://github.com/kokkos/kokkos/pull/5209) +- Add `fill_random` overload that takes an execution space as first argument [\#5181](https://github.com/kokkos/kokkos/pull/5181) +- Avoid some unnecessary fences in `parallel_reduce/scan` [\#5154](https://github.com/kokkos/kokkos/pull/5154) +- Include `KOKKOS_ENABLE_LIBDL` in options when printing configuration [\#5086](https://github.com/kokkos/kokkos/pull/5086) +- DynRankView: make `layout()` return the same as a corresponding static View [\#5026](https://github.com/kokkos/kokkos/pull/5026) +- Use `_mm_malloc` for icpx [\#5012](https://github.com/kokkos/kokkos/pull/5012) +- Avoid forcing matching execution spaces in `BinSort` constructor and `sort()` [\#4919](https://github.com/kokkos/kokkos/pull/4919) +- Check number of bins in `BinSort` [\#4890](https://github.com/kokkos/kokkos/pull/4890) +- Improve performance in parallel STL-like algorithms [\#4887](https://github.com/kokkos/kokkos/pull/4887) [\#4886](https://github.com/kokkos/kokkos/pull/4886) +- Disable `memset` on A64FX and launch `parallel_for` instead (performance) [\#4884](https://github.com/kokkos/kokkos/pull/4884) +- Allow non-power-of-two team sizes for team reductions and scans [\#4809](https://github.com/kokkos/kokkos/pull/4809) + +#### Harmonization of Kokkos execution environment initialization: +- Warn when unable to detect local MPI rank and user explicitly asked for it [\#5263](https://github.com/kokkos/kokkos/pull/5263) +- Refactor parsing of command line arguments and environment variables [\#5221](https://github.com/kokkos/kokkos/pull/5221) +- Refactor device selection at initialization [\#5211](https://github.com/kokkos/kokkos/pull/5211) +- Rename tools settings for consistency [\#5201](https://github.com/kokkos/kokkos/pull/5201) +- Print help only once [\#5128](https://github.com/kokkos/kokkos/pull/5128) +- Update precedence rule in initialization [\#5130](https://github.com/kokkos/kokkos/pull/5130) +- Warn instead of just ignoring user settings when kokkos-tools is disabled [\#5088](https://github.com/kokkos/kokkos/pull/5088) +- Drop numa args in threads backend initialization [\#5127](https://github.com/kokkos/kokkos/pull/5127) +- Warn users when a flag prefixed with -[-]kokkos is not recognized and do not remove it [\#5256](https://github.com/kokkos/kokkos/pull/5256) +- Give back to Core what belongs to Core (aka moving tune_internals option from Tools back to Core) [\#5202](https://github.com/kokkos/kokkos/pull/5202) + +#### Build system updates: +- `nvcc_wrapper`: filter out -pedantic-errors from nvcc options [\#5235](https://github.com/kokkos/kokkos/pull/5235) +- `nvcc_wrapper`: add known nvcc option --source-in-ptx [\#5052](https://github.com/kokkos/kokkos/pull/5052) +- Link libdl as interface library [\#5179](https://github.com/kokkos/kokkos/pull/5179) +- Only show GPU architectures with enabled corresponding backend [\#5119](https://github.com/kokkos/kokkos/pull/5119) +- Enable optional external desul build [\#5021](https://github.com/kokkos/kokkos/pull/5021) [\#5132](https://github.com/kokkos/kokkos/pull/5132) +- Export `Kokkos_CXX_STANDARD` variable with CMake [\#5068](https://github.com/kokkos/kokkos/pull/5068) +- Suppress warnings with nvc++ [\#5031](https://github.com/kokkos/kokkos/pull/5031) +- Disallow multiple host architectures in CMake [\#4996](https://github.com/kokkos/kokkos/pull/4996) +- Do not include compiler warning flags in the compile option of the cmake target [\#4989](https://github.com/kokkos/kokkos/pull/4989) +- AOT flags for OpenMPTarget targeting Intel GPUs [\#4915](https://github.com/kokkos/kokkos/pull/4915) +- Repurpose `Kokkos_ARCH_INTEL_GEN` for SYCL to mean JIT to be conforming with OMPT [\#4894](https://github.com/kokkos/kokkos/pull/4894) +- Replace amdgpu-target with offload-arch [\#4874](https://github.com/kokkos/kokkos/pull/4874) +- Do not enable `kokkos_launch_compiler` when `CMAKE_CXX_COMPILER_LAUNCHER` is set [\#4870](https://github.com/kokkos/kokkos/pull/4870) +- Move CMake version check up [\#4797](https://github.com/kokkos/kokkos/pull/4797) + +### Incompatibilities: +- Remove `KOKKOS_THREAD_LOCAL` [\#5064](https://github.com/kokkos/kokkos/pull/5064) +- Remove `KOKKOS_ENABLE_POSIX_MEMALIGN` [\#5011](https://github.com/kokkos/kokkos/pull/5011) +- Remove unused `KOKKOS_ENABLE_TM` [\#4995](https://github.com/kokkos/kokkos/pull/4995) +- Remove unused cmakedefine `KOKKOS_ENABLE_COMPILER_WARNINGS` [\#4883](https://github.com/kokkos/kokkos/pull/4883) +- Remove unused `KOKKOS_ENABLE_DUALVIEW_MODIFY_CHECK` [\#4882](https://github.com/kokkos/kokkos/pull/4882) +- Drop Instruction Set Architecture (ISA) macros [\#4981](https://github.com/kokkos/kokkos/pull/4981) +- Warn in `ScopeGuard` about illegal usage [\#5250](https://github.com/kokkos/kokkos/pull/5250) + +### Deprecations: +- Guard against non-public header inclusion [\#5178](https://github.com/kokkos/kokkos/pull/5178) +- Raise deprecation warnings if non empty WorkTag class is used [\#5230](https://github.com/kokkos/kokkos/pull/5230) +- Deprecate `parallel_*` overloads taking the label as trailing argument [\#5141](https://github.com/kokkos/kokkos/pull/5141) +- Deprecate nested types in functional [\#5185](https://github.com/kokkos/kokkos/pull/5185) +- Deprecate `InitArguments` struct and replace it with `InitializationSettings` [\#5135](https://github.com/kokkos/kokkos/pull/5135) +- Deprecate `finalize_all()` [\#5134](https://github.com/kokkos/kokkos/pull/5134) +- Deprecate command line arguments (other than `--help`) that are not prefixed with `kokkos-*` [\#5120](https://github.com/kokkos/kokkos/pull/5120) +- Deprecate `--[kokkos-]numa` cmdline arg and `KOKKOS_NUMA` env var [\#5117](https://github.com/kokkos/kokkos/pull/5117) +- Deprecate `--[kokkos-]threads` command line argument in favor of `--[kokkos-]num-threads` [\#5111](https://github.com/kokkos/kokkos/pull/5111) +- Deprecate `Kokkos::common_view_alloc_prop` [\#5059](https://github.com/kokkos/kokkos/pull/5059) +- Deprecate `Kokkos::is_reducer_type` [\#4957](https://github.com/kokkos/kokkos/pull/4957) +- Deprecate `OffsetView` constructors taking `index_list_type` [\#4810](https://github.com/kokkos/kokkos/pull/4810) +- Deprecate overloads of `Kokkos::sort` taking a parameter `bool always_use_kokkos_sort` [\#5382](https://github.com/kokkos/kokkos/issues/5382) +- Warn about `parallel_reduce` cases that call `join()` with volatile-qualified arguments [\#5215](https://github.com/kokkos/kokkos/pull/5215) + +### Bug Fixes: +- CUDA Reductions: Fix data races reported by Nvidia `compute-sanitizer` [\#4855](https://github.com/kokkos/kokkos/pull/4855) +- Work around Intel compiler bug [\#5301](https://github.com/kokkos/kokkos/pull/5301) +- Avoid allocating memory for UniqueToken [\#5300](https://github.com/kokkos/kokkos/pull/5300) +- DynamicView: Properly resize mirror instances after construction [\#5276](https://github.com/kokkos/kokkos/pull/5276) +- Remove Kokkos::Rank limit of 6 ranks [\#5271](https://github.com/kokkos/kokkos/pull/5271) +- Do not forget to set last element to nullptr when removing a flag in `Kokkos::initialize` [\#5272](https://github.com/kokkos/kokkos/pull/5272) +- Fix CUDA+MSVC build issue [\#5261](https://github.com/kokkos/kokkos/pull/5261) +- Fix `DynamicView::resize_serial` [\#5220](https://github.com/kokkos/kokkos/pull/5220) +- Fix cmake default compiler flags for unknown compiler [\#5217](https://github.com/kokkos/kokkos/pull/5217) +- Fix `move_backward` [\#5191](https://github.com/kokkos/kokkos/pull/5191) +- Fixing issue 5196 - missing symbol with intel compiler [\#5207](https://github.com/kokkos/kokkos/pull/5207) +- Preserve `KOKKOS_INVALID_INDEX` in ViewDimension and ArrayLayout construction [\#5188](https://github.com/kokkos/kokkos/pull/5188) +- Finalize `deep_copy_space` early avoiding printing to `std::cerr` for Cuda [\#5151](https://github.com/kokkos/kokkos/pull/5151) +- Use correct policy in Threads MDRange `parallel_reduce` [\#5123](https://github.com/kokkos/kokkos/pull/5123) +- Fix building with NVCC as the CXX compiler while the CUDA backend is not enabled [\#5115](https://github.com/kokkos/kokkos/pull/5115) +- OpenMPTarget Index range fix for MDRange. [\#5089](https://github.com/kokkos/kokkos/pull/5089) +- Fix bug with CUDA's team reduction for empty ranges [\#5079](https://github.com/kokkos/kokkos/pull/5079) +- Fix using `ZeroMemset` for Serial [\#5077](https://github.com/kokkos/kokkos/pull/5077) +- Fix `Kokkos::Vector::push_back` for default execution space [\#5047](https://github.com/kokkos/kokkos/pull/5047) +- ScatterView: Fix ScatterMin/ScatterMax to use proper atomics [\#5045](https://github.com/kokkos/kokkos/pull/5045) +- Fix calling `ZeroMemset` in `deep_copy` [\#5040](https://github.com/kokkos/kokkos/pull/5040) +- Make View self-assignment not produce double-free [\#5024](https://github.com/kokkos/kokkos/pull/5024) +- Guard against unrecognized pragma with intel compilers [\#5019](https://github.com/kokkos/kokkos/pull/5019) +- Fix racing condition in `HIPParallelLaunch` [\#5008](https://github.com/kokkos/kokkos/pull/5008) +- KokkosP: Fix `device_id` in profiling [\#4997](https://github.com/kokkos/kokkos/pull/4997) +- Fix for `Kokkos::vector::insert` into empty vector with begin and end iterators [\#4988](https://github.com/kokkos/kokkos/pull/4988) +- Fix Core header files installation [\#4984](https://github.com/kokkos/kokkos/pull/4984) +- Fix bounds errors with `Kokkos::sort` [\#4980](https://github.com/kokkos/kokkos/pull/4980) +- Fixup let `RangePolicy::set_chunk_size` return a reference to self [\#4918](https://github.com/kokkos/kokkos/pull/4918) +- Fix allocating large Views [\#4907](https://github.com/kokkos/kokkos/pull/4907) +- Fix combined reductions with `Kokkos::View` [\#4896](https://github.com/kokkos/kokkos/pull/4896) +- Fixed `_CUDA_ARCH__` to `__CUDA_ARCH__` for CUDA LDG [\#4893](https://github.com/kokkos/kokkos/pull/4893) +- Fixup `View::access()` truncate parameter pack [\#4876](https://github.com/kokkos/kokkos/pull/4876) +- Fix `abort` with HIP backend for ROCm 5.0.2 and beyond [\#4873](https://github.com/kokkos/kokkos/pull/4873) +- Fix HIP version when printing the configuration [\#4872](https://github.com/kokkos/kokkos/pull/4872) +- Fix scratch lock array when using scratch level 1 [\#4871](https://github.com/kokkos/kokkos/pull/4871) +- Fix Makefile.kokkos to work with fujitsu compiler [\#4867](https://github.com/kokkos/kokkos/pull/4867) +- cmake: Correct link THREADS link option [\#4854](https://github.com/kokkos/kokkos/pull/4854) +- UniqueToken `impl_acquire` function should be device only [\#4819](https://github.com/kokkos/kokkos/pull/4819) +- Fix example calls to non existing static `print_configuration` [\#4806](https://github.com/kokkos/kokkos/pull/4806) +- Fix requests for large team scratch sizes [\#4728](https://github.com/kokkos/kokkos/pull/4728) + + ## [3.6.01](https://github.com/kokkos/kokkos/tree/3.6.01) (2022-05-23) [Full Changelog](https://github.com/kokkos/kokkos/compare/3.6.00...3.6.01) diff --git a/lib/kokkos/CMakeLists.txt b/lib/kokkos/CMakeLists.txt index b0a54118a0..a05bfcdb94 100644 --- a/lib/kokkos/CMakeLists.txt +++ b/lib/kokkos/CMakeLists.txt @@ -1,3 +1,4 @@ +cmake_minimum_required(VERSION 3.16 FATAL_ERROR) # Disable in-source builds to prevent source tree corruption. if( "${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_BINARY_DIR}" ) @@ -28,11 +29,6 @@ SET(KOKKOS_SRC_PATH ${Kokkos_SOURCE_DIR}) SET(KOKKOS_PATH ${Kokkos_SOURCE_DIR}) SET(KOKKOS_TOP_BUILD_DIR ${CMAKE_CURRENT_BINARY_DIR}) -# Needed to simplify syntax of if statements -CMAKE_POLICY(SET CMP0054 NEW) -# Needed to make IN_LIST a valid operator -CMAKE_POLICY(SET CMP0057 NEW) - # Is this a build as part of Trilinos? IF(COMMAND TRIBITS_PACKAGE_DECL) SET(KOKKOS_HAS_TRILINOS ON) @@ -72,7 +68,6 @@ ENDFUNCTION() LIST(APPEND CMAKE_MODULE_PATH cmake/Modules) IF(NOT KOKKOS_HAS_TRILINOS) - cmake_minimum_required(VERSION 3.16 FATAL_ERROR) set(CMAKE_DISABLE_SOURCE_CHANGES ON) set(CMAKE_DISABLE_IN_SOURCE_BUILD ON) @@ -80,7 +75,7 @@ IF(NOT KOKKOS_HAS_TRILINOS) # downstream dependencies need to match this! SET(KOKKOS_COMPILE_LANGUAGE CXX) # use lower case here since we didn't parse options yet - IF (Kokkos_ENABLE_COMPILE_AS_CMAKE_LANGUAGE) + IF (Kokkos_ENABLE_COMPILE_AS_CMAKE_LANGUAGE AND Kokkos_ENABLE_CUDA) # Without this as a language for the package we would get a C++ compiler enabled. # but we still need a C++ compiler even if we build all our cpp files as CUDA only @@ -90,9 +85,7 @@ IF(NOT KOKKOS_HAS_TRILINOS) # days. SET(KOKKOS_INTERNAL_EXTRA_COMPILE_LANGUAGE CXX) - IF (Kokkos_ENABLE_CUDA) - SET(KOKKOS_COMPILE_LANGUAGE CUDA) - ENDIF() + SET(KOKKOS_COMPILE_LANGUAGE CUDA) ENDIF() IF (Spack_WORKAROUND) @@ -135,14 +128,11 @@ ENDIF() set(Kokkos_VERSION_MAJOR 3) -set(Kokkos_VERSION_MINOR 6) -set(Kokkos_VERSION_PATCH 01) +set(Kokkos_VERSION_MINOR 7) +set(Kokkos_VERSION_PATCH 00) set(Kokkos_VERSION "${Kokkos_VERSION_MAJOR}.${Kokkos_VERSION_MINOR}.${Kokkos_VERSION_PATCH}") math(EXPR KOKKOS_VERSION "${Kokkos_VERSION_MAJOR} * 10000 + ${Kokkos_VERSION_MINOR} * 100 + ${Kokkos_VERSION_PATCH}") -MESSAGE(STATUS "Setting policy CMP0074 to use _ROOT variables") -CMAKE_POLICY(SET CMP0074 NEW) - # Load either the real TriBITS or a TriBITS wrapper # for certain utility functions that are universal (like GLOBAL_SET) INCLUDE(${KOKKOS_SRC_PATH}/cmake/fake_tribits.cmake) @@ -204,11 +194,16 @@ KOKKOS_SETUP_BUILD_ENVIRONMENT() OPTION(BUILD_SHARED_LIBS "Build shared libraries" OFF) SET(KOKKOS_EXT_LIBRARIES Kokkos::kokkos Kokkos::kokkoscore Kokkos::kokkoscontainers Kokkos::kokkosalgorithms) -SET(KOKKOS_INT_LIBRARIES kokkos kokkoscore kokkoscontainers kokkosalgorithms) +SET(KOKKOS_SUB_LIBRARIES kokkoscore kokkoscontainers kokkosalgorithms) +IF (KOKKOS_CXX_STANDARD GREATER_EQUAL 17) + LIST(APPEND KOKKOS_EXT_LIBRARIES Kokkos::kokkossimd) + LIST(APPEND KOKKOS_SUB_LIBRARIES kokkossimd) +ENDIF() +SET(KOKKOS_INT_LIBRARIES kokkos ${KOKKOS_SUB_LIBRARIES}) SET_PROPERTY(GLOBAL PROPERTY KOKKOS_INT_LIBRARIES ${KOKKOS_INT_LIBRARIES}) IF (KOKKOS_HAS_TRILINOS) - SET(TRILINOS_INCDIR ${CMAKE_INSTALL_PREFIX}/${${PROJECT_NAME}_INSTALL_INCLUDE_DIR}) + SET(TRILINOS_INCDIR ${${PROJECT_NAME}_INSTALL_INCLUDE_DIR}) SET(KOKKOS_HEADER_DIR ${TRILINOS_INCDIR}) SET(KOKKOS_IS_SUBDIRECTORY TRUE) ELSEIF(HAS_PARENT) @@ -296,7 +291,7 @@ IF (NOT KOKKOS_HAS_TRILINOS AND NOT Kokkos_INSTALL_TESTING) #Make sure in-tree projects can reference this as Kokkos:: #to match the installed target names ADD_LIBRARY(Kokkos::kokkos ALIAS kokkos) - TARGET_LINK_LIBRARIES(kokkos INTERFACE kokkoscore kokkoscontainers kokkosalgorithms) + TARGET_LINK_LIBRARIES(kokkos INTERFACE ${KOKKOS_SUB_LIBRARIES}) KOKKOS_INTERNAL_ADD_LIBRARY_INSTALL(kokkos) ENDIF() INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_install.cmake) diff --git a/lib/kokkos/Makefile.kokkos b/lib/kokkos/Makefile.kokkos index 755831452b..7aa04aad7d 100644 --- a/lib/kokkos/Makefile.kokkos +++ b/lib/kokkos/Makefile.kokkos @@ -11,21 +11,21 @@ CXXFLAGS += $(SHFLAGS) endif KOKKOS_VERSION_MAJOR = 3 -KOKKOS_VERSION_MINOR = 6 -KOKKOS_VERSION_PATCH = 01 +KOKKOS_VERSION_MINOR = 7 +KOKKOS_VERSION_PATCH = 00 KOKKOS_VERSION = $(shell echo $(KOKKOS_VERSION_MAJOR)*10000+$(KOKKOS_VERSION_MINOR)*100+$(KOKKOS_VERSION_PATCH) | bc) # Options: Cuda,HIP,SYCL,OpenMPTarget,OpenMP,Threads,Serial KOKKOS_DEVICES ?= "OpenMP" #KOKKOS_DEVICES ?= "Threads" # Options: -# Intel: KNC,KNL,SNB,HSW,BDW,SKX +# Intel: KNC,KNL,SNB,HSW,BDW,SKL,SKX,ICL,ICX,SPR # NVIDIA: Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal60,Pascal61,Volta70,Volta72,Turing75,Ampere80,Ampere86 # ARM: ARMv80,ARMv81,ARMv8-ThunderX,ARMv8-TX2,A64FX # IBM: BGQ,Power7,Power8,Power9 # AMD-GPUS: Vega900,Vega906,Vega908,Vega90A # AMD-CPUS: AMDAVX,Zen,Zen2,Zen3 -# Intel-GPUs: Gen9,Gen11,Gen12LP,DG1,XeHP +# Intel-GPUs: Gen9,Gen11,Gen12LP,DG1,XeHP,PVC KOKKOS_ARCH ?= "" # Options: yes,no KOKKOS_DEBUG ?= "no" @@ -193,6 +193,8 @@ KOKKOS_INTERNAL_COMPILER_INTEL_CLANG := $(call kokkos_has_string,$(KOKKOS_CXX_VE KOKKOS_INTERNAL_COMPILER_APPLE_CLANG := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),Apple clang) KOKKOS_INTERNAL_COMPILER_HCC := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),HCC) KOKKOS_INTERNAL_COMPILER_GCC := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),GCC) +# TODO fujitsu can emulate gcc or clang. Only clang mode works at the moment. +KOKKOS_INTERNAL_COMPILER_FUJITSU := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),FUJITSU) # Check Host Compiler if using NVCC through nvcc_wrapper ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1) @@ -221,8 +223,23 @@ endif ifeq ($(KOKKOS_INTERNAL_COMPILER_HCC), 1) KOKKOS_INTENAL_COMPILER_CLANG = 0 endif +# Fujitsu passes also as clang and gcc respectively +ifeq ($(KOKKOS_INTERNAL_COMPILER_FUJITSU), 1) + ifeq ($(KOKKOS_INTERNAL_COMPILER_GCC), 1) + # TODO handle gcc flags and workaround for bug? + # fujitsu (gcc mode) is bugged, see https://github.com/kokkos/kokkos/issues/4730 + $(warning Warning: ${CXX} in Trad Mode '-Nnoclang' (default) is not recommended. Use 'CXX = ${CXX} -Nclang' instead.) + # HACK since fujitsu only accepts some gcc flags, disable gcc here? + # KOKKOS_INTERNAL_COMPILER_GCC = 0 + endif + # TODO handle clang flags + # warnings: works fine as is + # openmp: handled + #KOKKOS_INTERNAL_COMPILER_CLANG = 0 +endif ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) + # TODO empty variable if fujitsu (clang mode) passes as clang KOKKOS_INTERNAL_COMPILER_CLANG_VERSION := $(shell $(CXX) --version | grep version | cut -d ' ' -f3 | tr -d '.') ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) @@ -272,7 +289,12 @@ else ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY_CLANG), 1) KOKKOS_INTERNAL_OPENMP_FLAG := -fopenmp else - KOKKOS_INTERNAL_OPENMP_FLAG := -fopenmp=libomp + ifeq ($(KOKKOS_INTERNAL_COMPILER_FUJITSU), 1) + # fujitsu (clang mode) fails with `=libomp` + KOKKOS_INTERNAL_OPENMP_FLAG := -fopenmp + else + KOKKOS_INTERNAL_OPENMP_FLAG := -fopenmp=libomp + endif endif else ifeq ($(KOKKOS_INTERNAL_COMPILER_APPLE_CLANG), 1) @@ -300,11 +322,15 @@ ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 1) else ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) #KOKKOS_INTERNAL_OPENMPTARGET_FLAG := -DKOKKOS_BUG_WORKAROUND_IBM_CLANG_OMP45_VIEW_INIT -fopenmp-implicit-declare-target -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp -fopenmp=libomp - KOKKOS_INTERNAL_OPENMPTARGET_FLAG := -DKOKKOS_WORKAROUND_OPENMPTARGET_CLANG -fopenmp -fopenmp=libomp + KOKKOS_INTERNAL_OPENMPTARGET_FLAG := -DKOKKOS_WORKAROUND_OPENMPTARGET_CLANG -fopenmp -fopenmp=libomp -Wno-openmp-mapping KOKKOS_INTERNAL_OPENMPTARGET_LIB := -lomptarget else - #Assume GCC - KOKKOS_INTERNAL_OPENMPTARGET_FLAG := -fopenmp -foffload=nvptx-none + ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL_CLANG), 1) + KOKKOS_INTERNAL_OPENMPTARGET_FLAG := -fiopenmp -Wno-openmp-mapping + else + #Assume GCC + KOKKOS_INTERNAL_OPENMPTARGET_FLAG := -fopenmp -foffload=nvptx-none + endif endif endif @@ -344,8 +370,12 @@ KOKKOS_INTERNAL_USE_ARCH_WSM := $(call kokkos_has_string,$(KOKKOS_ARCH),WSM) KOKKOS_INTERNAL_USE_ARCH_SNB := $(call kokkos_has_string,$(KOKKOS_ARCH),SNB) KOKKOS_INTERNAL_USE_ARCH_HSW := $(call kokkos_has_string,$(KOKKOS_ARCH),HSW) KOKKOS_INTERNAL_USE_ARCH_BDW := $(call kokkos_has_string,$(KOKKOS_ARCH),BDW) +KOKKOS_INTERNAL_USE_ARCH_SKL := $(call kokkos_has_string,$(KOKKOS_ARCH),SKL) KOKKOS_INTERNAL_USE_ARCH_SKX := $(call kokkos_has_string,$(KOKKOS_ARCH),SKX) KOKKOS_INTERNAL_USE_ARCH_KNL := $(call kokkos_has_string,$(KOKKOS_ARCH),KNL) +KOKKOS_INTERNAL_USE_ARCH_ICL := $(call kokkos_has_string,$(KOKKOS_ARCH),ICL) +KOKKOS_INTERNAL_USE_ARCH_ICX := $(call kokkos_has_string,$(KOKKOS_ARCH),ICX) +KOKKOS_INTERNAL_USE_ARCH_SPR := $(call kokkos_has_string,$(KOKKOS_ARCH),SPR) KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelGen) KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN9 := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelGen9) @@ -353,6 +383,7 @@ KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN11 := $(call kokkos_has_string,$(KOKKOS_ARCH), KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN12LP := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelGen12LP) KOKKOS_INTERNAL_USE_ARCH_INTEL_DG1 := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelDG1) KOKKOS_INTERNAL_USE_ARCH_INTEL_XEHP := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelXeHP) +KOKKOS_INTERNAL_USE_ARCH_INTEL_PVC := $(call kokkos_has_string,$(KOKKOS_ARCH),PVC) # NVIDIA based. NVCC_WRAPPER := $(KOKKOS_PATH)/bin/nvcc_wrapper @@ -436,19 +467,9 @@ KOKKOS_INTERNAL_USE_ARCH_SSE42 := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_W KOKKOS_INTERNAL_USE_ARCH_AVX := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_SNB) + $(KOKKOS_INTERNAL_USE_ARCH_AMDAVX)) KOKKOS_INTERNAL_USE_ARCH_AVX2 := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_HSW) + $(KOKKOS_INTERNAL_USE_ARCH_BDW) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN2) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN3)) KOKKOS_INTERNAL_USE_ARCH_AVX512MIC := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_KNL)) -KOKKOS_INTERNAL_USE_ARCH_AVX512XEON := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_SKX)) - -# Decide what ISA level we are able to support. -KOKKOS_INTERNAL_USE_ISA_X86_64 := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_WSM) + $(KOKKOS_INTERNAL_USE_ARCH_SNB) + $(KOKKOS_INTERNAL_USE_ARCH_HSW) + $(KOKKOS_INTERNAL_USE_ARCH_BDW) + $(KOKKOS_INTERNAL_USE_ARCH_KNL) + $(KOKKOS_INTERNAL_USE_ARCH_SKX) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN2) + $(KOKKOS_INTERNAL_USE_ARCH_ZEN3)) -KOKKOS_INTERNAL_USE_ISA_KNC := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_KNC)) -KOKKOS_INTERNAL_USE_ISA_POWERPCLE := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_POWER8) + $(KOKKOS_INTERNAL_USE_ARCH_POWER9)) -KOKKOS_INTERNAL_USE_ISA_POWERPCBE := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_POWER7)) - -# Decide whether we can support transactional memory -KOKKOS_INTERNAL_USE_TM := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_BDW) + $(KOKKOS_INTERNAL_USE_ARCH_SKX)) # Incompatible flags? -KOKKOS_INTERNAL_USE_ARCH_MULTIHOST := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_SSE42)+$(KOKKOS_INTERNAL_USE_ARCH_AVX)+$(KOKKOS_INTERNAL_USE_ARCH_AVX2)+$(KOKKOS_INTERNAL_USE_ARCH_AVX512MIC)+$(KOKKOS_INTERNAL_USE_ARCH_AVX512XEON)+$(KOKKOS_INTERNAL_USE_ARCH_KNC)+$(KOKKOS_INTERNAL_USE_ARCH_IBM)+$(KOKKOS_INTERNAL_USE_ARCH_ARM)>1") | bc) +KOKKOS_INTERNAL_USE_ARCH_MULTIHOST := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_SSE42)+$(KOKKOS_INTERNAL_USE_ARCH_AVX)+$(KOKKOS_INTERNAL_USE_ARCH_AVX2)+$(KOKKOS_INTERNAL_USE_ARCH_AVX512MIC)+$(KOKKOS_INTERNAL_USE_ARCH_SKL)+$(KOKKOS_INTERNAL_USE_ARCH_SKX)+$(KOKKOS_INTERNAL_USE_ARCH_ICL)+$(KOKKOS_INTERNAL_USE_ARCH_ICX)+$(KOKKOS_INTERNAL_USE_ARCH_SPR)+$(KOKKOS_INTERNAL_USE_ARCH_KNC)+$(KOKKOS_INTERNAL_USE_ARCH_IBM)+$(KOKKOS_INTERNAL_USE_ARCH_ARM)>1") | bc) KOKKOS_INTERNAL_USE_ARCH_MULTIGPU := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_NVIDIA)>1") | bc) ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MULTIHOST), 1) @@ -551,36 +572,6 @@ ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_SERIAL") endif -ifeq ($(KOKKOS_INTERNAL_USE_TM), 1) - tmp := $(call kokkos_append_header,"$H""ifndef __CUDA_ARCH__") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_TM") - tmp := $(call kokkos_append_header,"$H""endif") -endif - -ifeq ($(KOKKOS_INTERNAL_USE_ISA_X86_64), 1) - tmp := $(call kokkos_append_header,"$H""ifndef __CUDA_ARCH__") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_USE_ISA_X86_64") - tmp := $(call kokkos_append_header,"$H""endif") -endif - -ifeq ($(KOKKOS_INTERNAL_USE_ISA_KNC), 1) - tmp := $(call kokkos_append_header,"$H""ifndef __CUDA_ARCH__") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_USE_ISA_KNC") - tmp := $(call kokkos_append_header,"$H""endif") -endif - -ifeq ($(KOKKOS_INTERNAL_USE_ISA_POWERPCLE), 1) - tmp := $(call kokkos_append_header,"$H""ifndef __CUDA_ARCH__") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_USE_ISA_POWERPCLE") - tmp := $(call kokkos_append_header,"$H""endif") -endif - -ifeq ($(KOKKOS_INTERNAL_USE_ISA_POWERPCBE), 1) - tmp := $(call kokkos_append_header,"$H""ifndef __CUDA_ARCH__") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_USE_ISA_POWERPCBE") - tmp := $(call kokkos_append_header,"$H""endif") -endif - #only add the c++ standard flags if this is not CMake tmp := $(call kokkos_append_header,"/* General Settings */") ifneq ($(KOKKOS_INTERNAL_DISABLE_DEPRECATED_CODE), 1) @@ -1041,7 +1032,28 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX512MIC), 1) endif endif -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX512XEON), 1) +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_SKL), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX512XEON") + + ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1) + KOKKOS_CXXFLAGS += -xSKYLAKE + KOKKOS_LDFLAGS += -xSKYLAKE + else + ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) + + else + ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) + + else + # Nothing here yet. + KOKKOS_CXXFLAGS += -march=skylake + KOKKOS_LDFLAGS += -march=skylake + endif + endif + endif +endif + +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_SKX), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX512XEON") ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1) @@ -1055,13 +1067,31 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX512XEON), 1) else # Nothing here yet. - KOKKOS_CXXFLAGS += -march=skylake-avx512 -mtune=skylake-avx512 -mrtm - KOKKOS_LDFLAGS += -march=skylake-avx512 -mtune=skylake-avx512 -mrtm + KOKKOS_CXXFLAGS += -march=skylake-avx512 -mtune=skylake-avx512 + KOKKOS_LDFLAGS += -march=skylake-avx512 -mtune=skylake-avx512 endif endif endif endif +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ICL), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX512XEON") + KOKKOS_CXXFLAGS += -march=icelake-client -mtune=icelake-client + KOKKOS_LDFLAGS += -march=icelake-client -mtune=icelake-client +endif + +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ICX), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX512XEON") + KOKKOS_CXXFLAGS += -march=icelake-server -mtune=icelake-server + KOKKOS_LDFLAGS += -march=icelake-server -mtune=icelake-server +endif + +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_SPR), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX512XEON") + KOKKOS_CXXFLAGS += -march=sapphirerapids -mtune=sapphirerapids + KOKKOS_LDFLAGS += -march=sapphirerapids -mtune=sapphirerapids +endif + ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KNC), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_KNC") KOKKOS_CXXFLAGS += -mmic @@ -1091,7 +1121,7 @@ endif ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1) ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) - KOKKOS_INTERNAL_CUDA_ARCH_FLAG=-fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march + KOKKOS_INTERNAL_CUDA_ARCH_FLAG=-fopenmp-targets=nvptx64 -Xopenmp-target -march endif KOKKOS_INTERNAL_USE_CUDA_ARCH = 1 endif @@ -1192,29 +1222,29 @@ ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1) ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VEGA900), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA900") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA") - KOKKOS_INTERNAL_HIP_ARCH_FLAG := --amdgpu-target=gfx900 + KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx900 endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VEGA906), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA906") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA") - KOKKOS_INTERNAL_HIP_ARCH_FLAG := --amdgpu-target=gfx906 + KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx906 endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VEGA908), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA908") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA") - KOKKOS_INTERNAL_HIP_ARCH_FLAG := --amdgpu-target=gfx908 + KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx908 endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VEGA90A), 1) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA90A") tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_VEGA") - KOKKOS_INTERNAL_HIP_ARCH_FLAG := --amdgpu-target=gfx90a + KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx90a endif KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/HIP/*.cpp) KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/HIP/*.hpp) ifeq ($(KOKKOS_INTERNAL_DISABLE_DESUL_ATOMICS), 0) - KOKKOS_SRC += $(KOKKOS_PATH)/core/src/desul/src/Lock_Array_HIP.cpp + KOKKOS_SRC += $(KOKKOS_PATH)/tpls/desul/src/Lock_Array_HIP.cpp endif KOKKOS_CXXFLAGS+=$(KOKKOS_INTERNAL_HIP_ARCH_FLAG) @@ -1230,51 +1260,67 @@ ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1) endif endif -# Figure out the architecture flag for SYCL. +# Figure out Intel architecture flags. ifeq ($(KOKKOS_INTERNAL_USE_SYCL), 1) - # Lets start with adding architecture defines - ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GPU") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GEN") - KOKKOS_INTERNAL_SYCL_ARCH_FLAG := -fsycl-targets=spir64_gen-unknown-unknown-sycldevice -Xsycl-target-backend "-device gen9-" - endif - ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN9), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GPU") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GEN9") - KOKKOS_INTERNAL_SYCL_ARCH_FLAG := -fsycl-targets=spir64_gen-unknown-unknown-sycldevice -Xsycl-target-backend "-device gen9" - endif - ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN11), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GPU") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GEN11") - KOKKOS_INTERNAL_SYCL_ARCH_FLAG := -fsycl-targets=spir64_gen-unknown-unknown-sycldevice -Xsycl-target-backend "-device gen11" - endif - ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN12LP), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GPU") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GEN12LP") - KOKKOS_INTERNAL_SYCL_ARCH_FLAG := -fsycl-targets=spir64_gen-unknown-unknown-sycldevice -Xsycl-target-backend "-device gen12lp" - endif - ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_DG1), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GPU") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_DG1") - KOKKOS_INTERNAL_SYCL_ARCH_FLAG := -fsycl-targets=spir64_gen-unknown-unknown-sycldevice -Xsycl-target-backend "-device dg1" - endif - ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_XEHP), 1) - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GPU") - tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_XEHP") - KOKKOS_INTERNAL_SYCL_ARCH_FLAG := -fsycl-targets=spir64_gen-unknown-unknown-sycldevice -Xsycl-target-backend "-device xehp" - endif + KOKKOS_INTERNAL_LC_BACKEND := sycl +endif +ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1) + KOKKOS_INTERNAL_LC_BACKEND := openmp +endif +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GPU") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GEN") + KOKKOS_INTERNAL_INTEL_ARCH_FLAG := -f${KOKKOS_INTERNAL_LC_BACKEND}-targets=spir64 +endif +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN9), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GPU") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GEN9") + KOKKOS_INTERNAL_INTEL_ARCH_FLAG := -f${KOKKOS_INTERNAL_LC_BACKEND}-targets=spir64_gen -X${KOKKOS_INTERNAL_LC_BACKEND}-target-backend "-device gen9" +endif +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN11), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GPU") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GEN11") + KOKKOS_INTERNAL_INTEL_ARCH_FLAG := -f${KOKKOS_INTERNAL_LC_BACKEND}-targets=spir64_gen -X${KOKKOS_INTERNAL_LC_BACKEND}-target-backend "-device gen11" +endif +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN12LP), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GPU") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GEN12LP") + KOKKOS_INTERNAL_INTEL_ARCH_FLAG := -f${KOKKOS_INTERNAL_LC_BACKEND}-targets=spir64_gen -X${KOKKOS_INTERNAL_LC_BACKEND}-target-backend "-device gen12lp" +endif +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_DG1), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GPU") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_DG1") + KOKKOS_INTERNAL_INTEL_ARCH_FLAG := -f${KOKKOS_INTERNAL_LC_BACKEND}-targets=spir64_gen -X${KOKKOS_INTERNAL_LC_BACKEND}-target-backend "-device dg1" +endif +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_XEHP), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GPU") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_XEHP") + KOKKOS_INTERNAL_INTEL_ARCH_FLAG := -f${KOKKOS_INTERNAL_LC_BACKEND}-targets=spir64_gen -X${KOKKOS_INTERNAL_LC_BACKEND}-target-backend "-device xehp" +endif +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_PVC), 1) + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_GPU") + tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_INTEL_PVC") + KOKKOS_INTERNAL_INTEL_ARCH_FLAG := -f${KOKKOS_INTERNAL_LC_BACKEND}-targets=spir64_gen -X${KOKKOS_INTERNAL_LC_BACKEND}-target-backend "-device 12.4.0" +endif +ifeq ($(KOKKOS_INTERNAL_USE_SYCL), 1) KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/SYCL/*.cpp) KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/SYCL/*.hpp) - KOKKOS_CXXFLAGS+=-fsycl -fno-sycl-id-queries-fit-in-int -fsycl-unnamed-lambda - KOKKOS_CXXFLAGS+=$(KOKKOS_INTERNAL_SYCL_ARCH_FLAG) + KOKKOS_CXXFLAGS+=-fsycl -fno-sycl-id-queries-fit-in-int -fsycl-unnamed-lambda -fsycl-dead-args-optimization + KOKKOS_CXXFLAGS+=$(KOKKOS_INTERNAL_INTEL_ARCH_FLAG) KOKKOS_LDFLAGS+=-fsycl - KOKKOS_LDFLAGS+=$(KOKKOS_INTERNAL_SYCL_ARCH_FLAG) + KOKKOS_LDFLAGS+=$(KOKKOS_INTERNAL_INTEL_ARCH_FLAG) +endif + +ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1) + KOKKOS_CXXFLAGS+=$(KOKKOS_INTERNAL_INTEL_ARCH_FLAG) -D__STRICT_ANSI__ + KOKKOS_LDFLAGS+=$(KOKKOS_INTERNAL_INTEL_ARCH_FLAG) endif ifeq ($(KOKKOS_INTERNAL_DISABLE_DESUL_ATOMICS), 0) tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_IMPL_DESUL_ATOMICS") + KOKKOS_CPPFLAGS+=-I$(KOKKOS_PATH)/tpls/desul/include else ifeq ($(KOKKOS_INTERNAL_ENABLE_DESUL_ATOMICS), 1) $(error Contradictory Desul atomics options: KOKKOS_OPTIONS=$(KOKKOS_OPTIONS) ) @@ -1359,7 +1405,7 @@ KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/containers/src/impl/*.cpp) ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.cpp) ifeq ($(KOKKOS_INTERNAL_DISABLE_DESUL_ATOMICS), 0) - KOKKOS_SRC += $(KOKKOS_PATH)/core/src/desul/src/Lock_Array_CUDA.cpp + KOKKOS_SRC += $(KOKKOS_PATH)/tpls/desul/src/Lock_Array_CUDA.cpp endif KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.hpp) ifneq ($(CUDA_PATH),) @@ -1417,6 +1463,11 @@ ifeq ($(KOKKOS_INTERNAL_USE_THREADS), 1) KOKKOS_TPL_LIBRARY_NAMES += pthread endif +ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1) + KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Serial/*.cpp) + KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Serial/*.hpp) +endif + ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1) KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/HPX/*.cpp) KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/HPX/*.hpp) @@ -1449,15 +1500,6 @@ ifneq ($(KOKKOS_INTERNAL_USE_MEMKIND), 1) KOKKOS_SRC := $(filter-out $(KOKKOS_PATH)/core/src/impl/Kokkos_HBWSpace.cpp,$(KOKKOS_SRC)) endif -# Don't include Kokkos_Serial.cpp or Kokkos_Serial_Task.cpp if not using Serial -# device to avoid a link warning. -ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1) -endif -ifneq ($(KOKKOS_INTERNAL_USE_SERIAL), 1) - KOKKOS_SRC := $(filter-out $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial.cpp,$(KOKKOS_SRC)) - KOKKOS_SRC := $(filter-out $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial_Task.cpp,$(KOKKOS_SRC)) -endif - # With Cygwin functions such as fdopen and fileno are not defined # when strict ansi is enabled. strict ansi gets enabled with -std=c++14 # though. So we hard undefine it here. Not sure if that has any bad side effects diff --git a/lib/kokkos/Makefile.targets b/lib/kokkos/Makefile.targets index a9cb12e1b4..876726e947 100644 --- a/lib/kokkos/Makefile.targets +++ b/lib/kokkos/Makefile.targets @@ -16,10 +16,6 @@ Kokkos_HostSpace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Ho $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HostSpace.cpp Kokkos_hwloc.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_hwloc.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_hwloc.cpp -Kokkos_Serial.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial.cpp -Kokkos_Serial_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial_Task.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial_Task.cpp Kokkos_TaskQueue.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_TaskQueue.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_TaskQueue.cpp Kokkos_HostThreadTeam.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HostThreadTeam.cpp @@ -41,6 +37,13 @@ Kokkos_HostSpace_deepcopy.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/ Kokkos_NumericTraits.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_NumericTraits.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_NumericTraits.cpp +ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1) +Kokkos_Serial.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Serial/Kokkos_Serial.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Serial/Kokkos_Serial.cpp +Kokkos_Serial_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Serial/Kokkos_Serial_Task.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Serial/Kokkos_Serial_Task.cpp +endif + ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) Kokkos_Cuda_Instance.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Instance.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Instance.cpp @@ -50,8 +53,8 @@ Kokkos_Cuda_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cu $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Task.cpp Kokkos_Cuda_Locks.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Locks.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Locks.cpp -Lock_Array_CUDA.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/desul/src/Lock_Array_CUDA.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/desul/src/Lock_Array_CUDA.cpp +Lock_Array_CUDA.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/tpls/desul/src/Lock_Array_CUDA.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/tpls/desul/src/Lock_Array_CUDA.cpp endif ifeq ($(KOKKOS_INTERNAL_USE_SYCL), 1) @@ -70,20 +73,18 @@ Kokkos_HIP_Instance.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/HIP/Kokkos_ $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/HIP/Kokkos_HIP_Instance.cpp Kokkos_HIP_Locks.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/HIP/Kokkos_HIP_Locks.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/HIP/Kokkos_HIP_Locks.cpp -Lock_Array_HIP.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/desul/src/Lock_Array_HIP.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/desul/src/Lock_Array_HIP.cpp +Lock_Array_HIP.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/tpls/desul/src/Lock_Array_HIP.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/tpls/desul/src/Lock_Array_HIP.cpp endif ifeq ($(KOKKOS_INTERNAL_USE_THREADS), 1) -Kokkos_ThreadsExec_base.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec_base.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec_base.cpp Kokkos_ThreadsExec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec.cpp endif ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) -Kokkos_OpenMP_Exec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP_Exec.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP_Exec.cpp +Kokkos_OpenMP_Instance.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP_Instance.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP_Instance.cpp Kokkos_OpenMP_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP_Task.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP_Task.cpp endif diff --git a/lib/kokkos/README.md b/lib/kokkos/README.md index f6c500cc1a..033346e956 100644 --- a/lib/kokkos/README.md +++ b/lib/kokkos/README.md @@ -10,270 +10,48 @@ hierarchies and multiple types of execution resources. It currently can use CUDA, HIP, SYCL, HPX, OpenMP and C++ threads as backend programming models with several other backends in development. -Kokkos Core is part of the Kokkos C++ Performance Portability Programming EcoSystem, -which also provides math kernels (https://github.com/kokkos/kokkos-kernels), as well as -profiling and debugging tools (https://github.com/kokkos/kokkos-tools). +**Kokkos Core is part of the Kokkos C++ Performance Portability Programming EcoSystem.** + +For the complete documentation, click below: + +# [kokkos.github.io/kokkos-core-wiki](https://kokkos.github.io/kokkos-core-wiki) # Learning about Kokkos -The best way to start learning about Kokkos is going through the Kokkos Lectures. -They are online available at https://kokkos.link/the-lectures and contain a mix -of lecture videos and hands-on exercises covering all the important Kokkos Ecosystem -capabilities. +To start learning about Kokkos: -A programming guide and API reference can be found on the Wiki -(https://github.com/kokkos/kokkos/wiki). +- [Kokkos Lectures](https://kokkos.github.io/kokkos-core-wiki/videolectures.html): they contain a mix of lecture videos and hands-on exercises covering all the important Kokkos Ecosystem capabilities. + +- [Programming guide](https://kokkos.github.io/kokkos-core-wiki/programmingguide.html): contains in "narrative" form a technical description of the programming model, machine model, and the main building blocks like the Views and parallel dispatch. + +- [API reference](https://kokkos.github.io/kokkos-core-wiki/): organized by category, i.e., [core](https://kokkos.github.io/kokkos-core-wiki/API/core-index.html), [algorithms](https://kokkos.github.io/kokkos-core-wiki/API/algorithms-index.html) and [containers](https://kokkos.github.io/kokkos-core-wiki/API/containers-index.html) or, if you prefer, in [alphabetical order](https://kokkos.github.io/kokkos-core-wiki/API/alphabetical.html). + +- [Use cases and Examples](https://kokkos.github.io/kokkos-core-wiki/usecases.html): a series of examples ranging from how to use Kokkos with MPI to Fortran interoperability. For questions find us on Slack: https://kokkosteam.slack.com or open a github issue. -For non-public questions send an email to -crtrott(at)sandia.gov +For non-public questions send an email to: *crtrott(at)sandia.gov* # Contributing to Kokkos -We are open and try to encourage contributions from external developers. -To do so please first open an issue describing the contribution and then issue -a pull request against the develop branch. For larger features it may be good -to get guidance from the core development team first through the github issue. +Please see [this page](https://kokkos.github.io/kokkos-core-wiki/contributing.html) for details on how to contribute. -Note that Kokkos Core is licensed under standard 3-clause BSD terms of use. -Which means contributing to Kokkos allows anyone else to use your contributions -not just for public purposes but also for closed source commercial projects. -For specifics see the LICENSE file contained in the repository or distribution. +# Requirements, Building and Installing -# Requirements - -### Minimum Compiler Versions - -Generally Kokkos should work with all compiler versions newer than the minimum. -However as in all sufficiently complex enough code, we have to work around compiler -bugs with almost all compilers. So compiler versions we don't test may have issues -we are unaware of. - -* GCC: 5.3.0 -* Clang: 4.0.0 -* Intel: 17.0.1 -* NVCC: 9.2.88 -* NVC++: 21.5 -* ROCm: 4.3 -* MSVC: 19.29 -* IBM XL: 16.1.1 -* Fujitsu: 4.5.0 -* ARM/Clang 20.1 - -### Primary Tested Compilers - -* GCC: 5.3.0, 6.1.0, 7.3.0, 8.3, 9.2, 10.0 -* NVCC: 9.2.88, 10.1, 11.0 -* Clang: 8.0.0, 9.0.0, 10.0.0, 12.0.0 -* Intel 17.4, 18.1, 19.5 -* MSVC: 19.29 -* ARM/Clang: 20.1 -* IBM XL: 16.1.1 -* ROCm: 4.3.0 - -### Build system: - -* CMake >= 3.16: required -* CMake >= 3.18: Fortran linkage. This does not affect most mixed Fortran/Kokkos builds. See [build issues](BUILD.md#KnownIssues). -* CMake >= 3.21.1 for NVC++ - -Primary tested compiler are passing in release mode -with warnings as errors. They also are tested with a comprehensive set of -backend combinations (i.e. OpenMP, Threads, Serial, OpenMP+Serial, ...). -We are using the following set of flags: -* GCC: - ```` - -Wall -Wunused-parameter -Wshadow -pedantic - -Werror -Wsign-compare -Wtype-limits - -Wignored-qualifiers -Wempty-body - -Wclobbered -Wuninitialized - ```` -* Intel: - ```` - -Wall -Wunused-parameter -Wshadow -pedantic - -Werror -Wsign-compare -Wtype-limits - -Wuninitialized - ```` -* Clang: - ```` - -Wall -Wunused-parameter -Wshadow -pedantic - -Werror -Wsign-compare -Wtype-limits - -Wuninitialized - ```` - -* NVCC: - ```` - -Wall -Wunused-parameter -Wshadow -pedantic - -Werror -Wsign-compare -Wtype-limits - -Wuninitialized - ```` - -Other compilers are tested occasionally, in particular when pushing from develop to -master branch. These are tested less rigorously without `-Werror` and only for a select set of backends. - -# Building and Installing Kokkos -Kokkos provide a CMake build system and a raw Makefile build system. -The CMake build system is strongly encouraged and will be the most rigorously supported in future releases. -Full details are given in the [build instructions](BUILD.md). Basic setups are shown here: - -## CMake - -The best way to install Kokkos is using the CMake build system. Assuming Kokkos lives in `$srcdir`: -````bash -cmake $srcdir \ - -DCMAKE_CXX_COMPILER=$path_to_compiler \ - -DCMAKE_INSTALL_PREFIX=$path_to_install \ - -DKokkos_ENABLE_OPENMP=On \ - -DKokkos_ARCH_HSW=On \ - -DKokkos_HWLOC_DIR=$path_to_hwloc -```` -then simply type `make install`. The Kokkos CMake package will then be installed in `$path_to_install` to be used by downstream packages. - -To validate the Kokkos build, configure with -```` - -DKokkos_ENABLE_TESTS=On -```` -and run `make test` after completing the build. - -For your CMake project using Kokkos, code such as the following: - -````cmake -find_package(Kokkos) -... -target_link_libraries(myTarget Kokkos::kokkos) -```` -should be added to your CMakeLists.txt. Your configure should additionally include -```` --DKokkos_DIR=$path_to_install/cmake/lib/Kokkos -```` -or -```` --DKokkos_ROOT=$path_to_install -```` -for the install location given above. - -## Spack -An alternative to manually building with the CMake is to use the Spack package manager. -To get started, download the Spack [repo](https://github.com/spack/spack). -```` -A basic installation would be done as: -````bash -> spack install kokkos -```` -Spack allows options and and compilers to be tuned in the install command. -````bash -> spack install kokkos@3.0 %gcc@7.3.0 +openmp -```` -This example illustrates the three most common parameters to Spack: -* Variants: specified with, e.g. `+openmp`, this activates (or deactivates with, e.g. `~openmp`) certain options. -* Version: immediately following `kokkos` the `@version` can specify a particular Kokkos to build -* Compiler: a default compiler will be chosen if not specified, but an exact compiler version can be given with the `%`option. - -For a complete list of Kokkos options, run: -````bash -> spack info kokkos -```` -Spack currently installs packages to a location determined by a unique hash. This hash name is not really "human readable". -Generally, Spack usage should never really require you to reference the computer-generated unique install folder. -More details are given in the [build instructions](BUILD.md). If you must know, you can locate Spack Kokkos installations with: -````bash -> spack find -p kokkos ... -```` -where `...` is the unique spec identifying the particular Kokkos configuration and version. -Some more details can found in the Kokkos spack [documentation](Spack.md) or the Spack [website](https://spack.readthedocs.io/en/latest). - -## Raw Makefile - -Raw Makefiles are only supported via inline builds. See below. - -## Inline Builds vs. Installed Package -For individual projects, it may be preferable to build Kokkos inline rather than link to an installed package. -The main reason is that you may otherwise need many different -configurations of Kokkos installed depending on the required compile time -features an application needs. For example there is only one default -execution space, which means you need different installations to have OpenMP -or C++ threads as the default space. Also for the CUDA backend there are certain -choices, such as allowing relocatable device code, which must be made at -installation time. Building Kokkos inline uses largely the same process -as compiling an application against an installed Kokkos library. - -For CMake, this means copying over the Kokkos source code into your project and adding `add_subdirectory(kokkos)` to your CMakeLists.txt. - -For raw Makefiles, see the example benchmarks/bytes_and_flops/Makefile which can be used with an installed library and or an inline build. - -# Kokkos and CUDA UVM - -Kokkos does support UVM as a specific memory space called CudaUVMSpace. -Allocations made with that space are accessible from host and device. -You can tell Kokkos to use that as the default space for Cuda allocations. -In either case UVM comes with a number of restrictions: -* You can't access allocations on the host while a kernel is potentially -running. This will lead to segfaults. To avoid that you either need to -call Kokkos::Cuda::fence() (or just Kokkos::fence()), after kernels, or -you can set the environment variable CUDA_LAUNCH_BLOCKING=1. -* In multi socket multi GPU machines without NVLINK, UVM defaults -to using zero copy allocations for technical reasons related to using multiple -GPUs from the same process. If an executable doesn't do that (e.g. each -MPI rank of an application uses a single GPU [can be the same GPU for -multiple MPI ranks]) you can set CUDA_MANAGED_FORCE_DEVICE_ALLOC=1. -This will enforce proper UVM allocations, but can lead to errors if -more than a single GPU is used by a single process. +All requirements including minimum and primary tested compiler versions can be found [here](https://kokkos.github.io/kokkos-core-wiki/requirements.html). +Building and installation instructions are described [here](https://kokkos.github.io/kokkos-core-wiki/building.html). # Citing Kokkos -If you publish work which mentions Kokkos, please cite the following paper: +Please see the [following page](https://kokkos.github.io/kokkos-core-wiki/citation.html). -````BibTex -@ARTICLE{9485033, - author={Trott, Christian R. and Lebrun-Grandié, Damien and Arndt, Daniel and Ciesko, Jan and Dang, Vinh and Ellingwood, Nathan and Gayatri, Rahulkumar and Harvey, Evan and Hollman, Daisy S. and Ibanez, Dan and Liber, Nevin and Madsen, Jonathan and Miles, Jeff and Poliakoff, David and Powell, Amy and Rajamanickam, Sivasankaran and Simberg, Mikael and Sunderland, Dan and Turcksin, Bruno and Wilke, Jeremiah}, - journal={IEEE Transactions on Parallel and Distributed Systems}, - title={Kokkos 3: Programming Model Extensions for the Exascale Era}, - year={2022}, - volume={33}, - number={4}, - pages={805-817}, - doi={10.1109/TPDS.2021.3097283}} -```` - -If you use more than one Kokkos EcoSystem package, please also cite: - -````BibTex -@ARTICLE{9502936, - author={Trott, Christian and Berger-Vergiat, Luc and Poliakoff, David and Rajamanickam, Sivasankaran and Lebrun-Grandie, Damien and Madsen, Jonathan and Al Awar, Nader and Gligoric, Milos and Shipman, Galen and Womeldorff, Geoff}, - journal={Computing in Science Engineering}, - title={The Kokkos EcoSystem: Comprehensive Performance Portability for High Performance Computing}, - year={2021}, - volume={23}, - number={5}, - pages={10-18}, - doi={10.1109/MCSE.2021.3098509}} -```` - - -And if you feel generous: feel free to cite the original Kokkos paper which describes most of the basic Kokkos concepts: - -````BibTeX -@article{CarterEdwards20143202, - title = "Kokkos: Enabling manycore performance portability through polymorphic memory access patterns ", - journal = "Journal of Parallel and Distributed Computing ", - volume = "74", - number = "12", - pages = "3202 - 3216", - year = "2014", - note = "Domain-Specific Languages and High-Level Frameworks for High-Performance Computing ", - issn = "0743-7315", - doi = "https://doi.org/10.1016/j.jpdc.2014.07.003", - url = "http://www.sciencedirect.com/science/article/pii/S0743731514001257", - author = "H. Carter Edwards and Christian R. Trott and Daniel Sunderland" -} -```` - -##### [LICENSE](https://github.com/kokkos/kokkos/blob/master/LICENSE) +# License [![License](https://img.shields.io/badge/License-BSD%203--Clause-blue.svg)](https://opensource.org/licenses/BSD-3-Clause) Under the terms of Contract DE-NA0003525 with NTESS, the U.S. Government retains certain rights in this software. +The full license statement used in all headers is available [here](https://kokkos.github.io/kokkos-core-wiki/license.html) or +[here](https://github.com/kokkos/kokkos/blob/master/LICENSE). diff --git a/lib/kokkos/algorithms/src/CMakeLists.txt b/lib/kokkos/algorithms/src/CMakeLists.txt index 4b60d887f7..597626b111 100644 --- a/lib/kokkos/algorithms/src/CMakeLists.txt +++ b/lib/kokkos/algorithms/src/CMakeLists.txt @@ -11,6 +11,7 @@ FILE(GLOB ALGO_HEADERS *.hpp) FILE(GLOB ALGO_SOURCES *.cpp) LIST(APPEND ALGO_HEADERS ${CMAKE_CURRENT_BINARY_DIR}/${PACKAGE_NAME}_config.h) APPEND_GLOB(ALGO_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/std_algorithms/*.hpp) +APPEND_GLOB(ALGO_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/std_algorithms/impl/*.hpp) INSTALL ( DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/" diff --git a/lib/kokkos/algorithms/src/Kokkos_Random.hpp b/lib/kokkos/algorithms/src/Kokkos_Random.hpp index 59c11afd9a..1d85ffdfb9 100644 --- a/lib/kokkos/algorithms/src/Kokkos_Random.hpp +++ b/lib/kokkos/algorithms/src/Kokkos_Random.hpp @@ -44,6 +44,10 @@ #ifndef KOKKOS_RANDOM_HPP #define KOKKOS_RANDOM_HPP +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_RANDOM +#endif #include #include @@ -648,63 +652,44 @@ struct Random_UniqueIndex { } }; -#ifdef KOKKOS_ENABLE_CUDA -template -struct Random_UniqueIndex> { - using locks_view_type = - View>; - KOKKOS_FUNCTION - static int get_state_idx(const locks_view_type& locks_) { -#ifdef __CUDA_ARCH__ - const int i_offset = - (threadIdx.x * blockDim.y + threadIdx.y) * blockDim.z + threadIdx.z; - int i = (((blockIdx.x * gridDim.y + blockIdx.y) * gridDim.z + blockIdx.z) * - blockDim.x * blockDim.y * blockDim.z + - i_offset) % - locks_.extent(0); - while (Kokkos::atomic_compare_exchange(&locks_(i, 0), 0, 1)) { - i += blockDim.x * blockDim.y * blockDim.z; - if (i >= static_cast(locks_.extent(0))) { - i = i_offset; - } - } - return i; -#else - (void)locks_; - return 0; -#endif - } -}; +#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP) + +#if defined(KOKKOS_ENABLE_CUDA) +#define KOKKOS_IMPL_EXECUTION_SPACE_CUDA_OR_HIP Kokkos::Cuda +#elif defined(KOKKOS_ENABLE_HIP) +#define KOKKOS_IMPL_EXECUTION_SPACE_CUDA_OR_HIP Kokkos::Experimental::HIP #endif -#ifdef KOKKOS_ENABLE_HIP template struct Random_UniqueIndex< - Kokkos::Device> { + Kokkos::Device> { using locks_view_type = - View>; + View>; KOKKOS_FUNCTION static int get_state_idx(const locks_view_type& locks_) { -#ifdef __HIP_DEVICE_COMPILE__ - const int i_offset = - (threadIdx.x * blockDim.y + threadIdx.y) * blockDim.z + threadIdx.z; - int i = (((blockIdx.x * gridDim.y + blockIdx.y) * gridDim.z + blockIdx.z) * + KOKKOS_IF_ON_DEVICE(( + const int i_offset = + (threadIdx.x * blockDim.y + threadIdx.y) * blockDim.z + threadIdx.z; + int i = + (((blockIdx.x * gridDim.y + blockIdx.y) * gridDim.z + blockIdx.z) * blockDim.x * blockDim.y * blockDim.z + i_offset) % locks_.extent(0); - while (Kokkos::atomic_compare_exchange(&locks_(i, 0), 0, 1)) { - i += blockDim.x * blockDim.y * blockDim.z; - if (i >= static_cast(locks_.extent(0))) { - i = i_offset; - } - } - return i; -#else - (void)locks_; - return 0; -#endif + while (Kokkos::atomic_compare_exchange(&locks_(i, 0), 0, 1)) { + i += blockDim.x * blockDim.y * blockDim.z; + if (i >= static_cast(locks_.extent(0))) { + i = i_offset; + } + } + + return i;)) + KOKKOS_IF_ON_HOST(((void)locks_; return 0;)) } }; + +#undef KOKKOS_IMPL_EXECUTION_SPACE_CUDA_OR_HIP + #endif #ifdef KOKKOS_ENABLE_SYCL @@ -1279,7 +1264,6 @@ struct fill_random_functor_begin_end; template struct fill_random_functor_begin_end { - using execution_space = typename ViewType::execution_space; ViewType a; RandomPool rand_pool; typename ViewType::const_value_type begin, end; @@ -1303,7 +1287,6 @@ struct fill_random_functor_begin_end struct fill_random_functor_begin_end { - using execution_space = typename ViewType::execution_space; ViewType a; RandomPool rand_pool; typename ViewType::const_value_type begin, end; @@ -1331,7 +1314,6 @@ struct fill_random_functor_begin_end struct fill_random_functor_begin_end { - using execution_space = typename ViewType::execution_space; ViewType a; RandomPool rand_pool; typename ViewType::const_value_type begin, end; @@ -1361,7 +1343,6 @@ struct fill_random_functor_begin_end struct fill_random_functor_begin_end { - using execution_space = typename ViewType::execution_space; ViewType a; RandomPool rand_pool; typename ViewType::const_value_type begin, end; @@ -1392,7 +1373,6 @@ struct fill_random_functor_begin_end struct fill_random_functor_begin_end { - using execution_space = typename ViewType::execution_space; ViewType a; RandomPool rand_pool; typename ViewType::const_value_type begin, end; @@ -1424,7 +1404,6 @@ struct fill_random_functor_begin_end struct fill_random_functor_begin_end { - using execution_space = typename ViewType::execution_space; ViewType a; RandomPool rand_pool; typename ViewType::const_value_type begin, end; @@ -1458,7 +1437,6 @@ struct fill_random_functor_begin_end struct fill_random_functor_begin_end { - using execution_space = typename ViewType::execution_space; ViewType a; RandomPool rand_pool; typename ViewType::const_value_type begin, end; @@ -1494,7 +1472,6 @@ struct fill_random_functor_begin_end struct fill_random_functor_begin_end { - using execution_space = typename ViewType::execution_space; ViewType a; RandomPool rand_pool; typename ViewType::const_value_type begin, end; @@ -1532,7 +1509,6 @@ struct fill_random_functor_begin_end struct fill_random_functor_begin_end { - using execution_space = typename ViewType::execution_space; ViewType a; RandomPool rand_pool; typename ViewType::const_value_type begin, end; @@ -1569,34 +1545,57 @@ struct fill_random_functor_begin_end -void fill_random(ViewType a, RandomPool g, +template +void fill_random(const ExecutionSpace& exec, ViewType a, RandomPool g, typename ViewType::const_value_type begin, typename ViewType::const_value_type end) { int64_t LDA = a.extent(0); if (LDA > 0) - parallel_for("Kokkos::fill_random", (LDA + 127) / 128, - Impl::fill_random_functor_begin_end( - a, g, begin, end)); + parallel_for( + "Kokkos::fill_random", + Kokkos::RangePolicy(exec, 0, (LDA + 127) / 128), + Impl::fill_random_functor_begin_end( + a, g, begin, end)); } } // namespace Impl +template +void fill_random(const ExecutionSpace& exec, ViewType a, RandomPool g, + typename ViewType::const_value_type begin, + typename ViewType::const_value_type end) { + Impl::apply_to_view_of_static_rank( + [&](auto dst) { Kokkos::Impl::fill_random(exec, dst, g, begin, end); }, + a); +} + +template +void fill_random(const ExecutionSpace& exec, ViewType a, RandomPool g, + typename ViewType::const_value_type range) { + fill_random(exec, a, g, 0, range); +} + template void fill_random(ViewType a, RandomPool g, typename ViewType::const_value_type begin, typename ViewType::const_value_type end) { - Impl::apply_to_view_of_static_rank( - [&](auto dst) { Kokkos::Impl::fill_random(dst, g, begin, end); }, a); + fill_random(typename ViewType::execution_space{}, a, g, begin, end); } template void fill_random(ViewType a, RandomPool g, typename ViewType::const_value_type range) { - fill_random(a, g, 0, range); + fill_random(typename ViewType::execution_space{}, a, g, 0, range); } } // namespace Kokkos +#ifdef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_RANDOM +#undef KOKKOS_IMPL_PUBLIC_INCLUDE +#undef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_RANDOM +#endif #endif diff --git a/lib/kokkos/algorithms/src/Kokkos_Sort.hpp b/lib/kokkos/algorithms/src/Kokkos_Sort.hpp index ce97de9b7d..ad0c2d47b6 100644 --- a/lib/kokkos/algorithms/src/Kokkos_Sort.hpp +++ b/lib/kokkos/algorithms/src/Kokkos_Sort.hpp @@ -44,6 +44,10 @@ #ifndef KOKKOS_SORT_HPP_ #define KOKKOS_SORT_HPP_ +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_SORT +#endif #include @@ -120,13 +124,13 @@ class BinSort { // If a Kokkos::View then can generate constant random access // otherwise can only use the constant type. - using src_view_type = typename std::conditional< + using src_view_type = std::conditional_t< Kokkos::is_view::value, Kokkos::View >, - typename SrcViewType::const_type>::type; + typename SrcViewType::const_type>; using perm_view_type = typename PermuteViewType::const_type; @@ -151,8 +155,11 @@ class BinSort { } }; - using execution_space = typename Space::execution_space; - using bin_op_type = BinSortOp; + // Naming this alias "execution_space" would be problematic since it would be + // considered as execution space for the various functors which might use + // another execution space through sort() or create_permute_vector(). + using exec_space = typename Space::execution_space; + using bin_op_type = BinSortOp; struct bin_count_tag {}; struct bin_offset_tag {}; @@ -171,13 +178,13 @@ class BinSort { // If a Kokkos::View then can generate constant random access // otherwise can only use the constant type. - using const_rnd_key_view_type = typename std::conditional< + using const_rnd_key_view_type = std::conditional_t< Kokkos::is_view::value, Kokkos::View >, - const_key_view_type>::type; + const_key_view_type>; using non_const_key_scalar = typename KeyViewType::non_const_value_type; using const_key_scalar = typename KeyViewType::const_value_type; @@ -220,6 +227,14 @@ class BinSort { range_begin(range_begin_), range_end(range_end_), sort_within_bins(sort_within_bins_) { + static_assert( + Kokkos::SpaceAccessibility::accessible, + "The provided execution space must be able to access the memory space " + "BinSort was initialized with!"); + if (bin_op.max_bins() <= 0) + Kokkos::abort( + "The number of bins in the BinSortOp object must be greater than 0!"); bin_count_atomic = Kokkos::View( "Kokkos::SortImpl::BinSortFunctor::bin_count", bin_op.max_bins()); bin_count_const = bin_count_atomic; @@ -235,7 +250,7 @@ class BinSort { BinSort(const_key_view_type keys_, int range_begin_, int range_end_, BinSortOp bin_op_, bool sort_within_bins_ = false) - : BinSort(execution_space{}, keys_, range_begin_, range_end_, bin_op_, + : BinSort(exec_space{}, keys_, range_begin_, range_end_, bin_op_, sort_within_bins_) {} template @@ -245,13 +260,19 @@ class BinSort { BinSort(const_key_view_type keys_, BinSortOp bin_op_, bool sort_within_bins_ = false) - : BinSort(execution_space{}, keys_, bin_op_, sort_within_bins_) {} + : BinSort(exec_space{}, keys_, bin_op_, sort_within_bins_) {} //---------------------------------------- // Create the permutation vector, the bin_offset array and the bin_count // array. Can be called again if keys changed - template - void create_permute_vector(const ExecutionSpace& exec = execution_space{}) { + template + void create_permute_vector(const ExecutionSpace& exec = exec_space{}) { + static_assert( + Kokkos::SpaceAccessibility::accessible, + "The provided execution space must be able to access the memory space " + "BinSort was initialized with!"); + const size_t len = range_end - range_begin; Kokkos::parallel_for( "Kokkos::Sort::BinCount", @@ -281,6 +302,17 @@ class BinSort { template void sort(const ExecutionSpace& exec, ValuesViewType const& values, int values_range_begin, int values_range_end) const { + static_assert( + Kokkos::SpaceAccessibility::accessible, + "The provided execution space must be able to access the memory space " + "BinSort was initialized with!"); + static_assert( + Kokkos::SpaceAccessibility< + ExecutionSpace, typename ValuesViewType::memory_space>::accessible, + "The provided execution space must be able to access the memory space " + "of the View argument!"); + using scratch_view_type = Kokkos::View void sort(ValuesViewType const& values, int values_range_begin, int values_range_end) const { - execution_space exec; + exec_space exec; sort(exec, values, values_range_begin, values_range_end); exec.fence("Kokkos::Sort: fence after sorting"); } @@ -428,7 +460,7 @@ struct BinOp1D { BinOp1D() = default; - // Construct BinOp with number of bins, minimum value and maxuimum value + // Construct BinOp with number of bins, minimum value and maximum value BinOp1D(int max_bins__, typename KeyViewType::const_value_type min, typename KeyViewType::const_value_type max) : max_bins_(max_bins__ + 1), @@ -554,11 +586,7 @@ struct min_max_functor { template std::enable_if_t::value> sort( - const ExecutionSpace& exec, ViewType const& view, - bool const always_use_kokkos_sort = false) { - if (!always_use_kokkos_sort) { - if (Impl::try_std_sort(view, exec)) return; - } + const ExecutionSpace& exec, ViewType const& view) { using CompType = BinOp1D; Kokkos::MinMaxScalar result; @@ -596,12 +624,38 @@ std::enable_if_t::value> sort( bin_sort.sort(exec, view); } +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_3 +template +KOKKOS_DEPRECATED_WITH_COMMENT( + "Use the overload not taking bool always_use_kokkos_sort") +std::enable_if_t::value> sort( + const ExecutionSpace& exec, ViewType const& view, + bool const always_use_kokkos_sort) { + if (!always_use_kokkos_sort && Impl::try_std_sort(view, exec)) { + return; + } else { + sort(exec, view); + } +} +#endif + template -void sort(ViewType const& view, bool const always_use_kokkos_sort = false) { +void sort(ViewType const& view) { + typename ViewType::execution_space exec; + sort(exec, view); + exec.fence("Kokkos::Sort: fence after sorting"); +} + +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_3 +template +KOKKOS_DEPRECATED_WITH_COMMENT( + "Use the overload not taking bool always_use_kokkos_sort") +void sort(ViewType const& view, bool const always_use_kokkos_sort) { typename ViewType::execution_space exec; sort(exec, view, always_use_kokkos_sort); exec.fence("Kokkos::Sort: fence after sorting"); } +#endif template std::enable_if_t::value> sort( @@ -635,4 +689,8 @@ void sort(ViewType view, size_t const begin, size_t const end) { } // namespace Kokkos +#ifdef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_SORT +#undef KOKKOS_IMPL_PUBLIC_INCLUDE +#undef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_SORT +#endif #endif diff --git a/lib/kokkos/algorithms/src/Kokkos_StdAlgorithms.hpp b/lib/kokkos/algorithms/src/Kokkos_StdAlgorithms.hpp index 2e3babbcf0..3e0f731cf0 100644 --- a/lib/kokkos/algorithms/src/Kokkos_StdAlgorithms.hpp +++ b/lib/kokkos/algorithms/src/Kokkos_StdAlgorithms.hpp @@ -44,59 +44,103 @@ #ifndef KOKKOS_STD_ALGORITHMS_HPP #define KOKKOS_STD_ALGORITHMS_HPP +#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE +#define KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_STD_ALGORITHMS +#endif /// \file Kokkos_StdAlgorithms.hpp /// \brief Kokkos counterparts for Standard C++ Library algorithms -#include -#include -#include +#include "std_algorithms/impl/Kokkos_Constraints.hpp" +#include "std_algorithms/impl/Kokkos_RandomAccessIterator.hpp" +#include "std_algorithms/Kokkos_BeginEnd.hpp" // distance -#include +#include "std_algorithms/Kokkos_Distance.hpp" -// move, swap, iter_swap -#include "std_algorithms/Kokkos_ModifyingOperations.hpp" +// note that we categorize below the headers +// following the std classification. -// find, find_if, find_if_not -// for_each, for_each_n -// mismatch -// equal -// count_if, count -// all_of, any_of, none_of -// adjacent_find -// lexicographical_compare -// search, search_n -// find_first_of, find_end -#include +// modifying ops +#include "std_algorithms/Kokkos_Swap.hpp" +#include "std_algorithms/Kokkos_IterSwap.hpp" -// replace, replace_copy_if, replace_copy, replace_if -// copy, copy_n, copy_backward, copy_if -// fill, fill_n -// transform -// generate, generate_n -// reverse, reverse_copy -// move, move_backward -// swap_ranges -// unique, unique_copy -// rotate, rotate_copy -// remove, remove_if, remove_copy, remove_copy_if -// shift_left, shift_right -#include +// non-modifying sequence +#include "std_algorithms/Kokkos_AdjacentFind.hpp" +#include "std_algorithms/Kokkos_Count.hpp" +#include "std_algorithms/Kokkos_CountIf.hpp" +#include "std_algorithms/Kokkos_AllOf.hpp" +#include "std_algorithms/Kokkos_AnyOf.hpp" +#include "std_algorithms/Kokkos_NoneOf.hpp" +#include "std_algorithms/Kokkos_Equal.hpp" +#include "std_algorithms/Kokkos_Find.hpp" +#include "std_algorithms/Kokkos_FindIf.hpp" +#include "std_algorithms/Kokkos_FindIfNot.hpp" +#include "std_algorithms/Kokkos_FindEnd.hpp" +#include "std_algorithms/Kokkos_FindFirstOf.hpp" +#include "std_algorithms/Kokkos_ForEach.hpp" +#include "std_algorithms/Kokkos_ForEachN.hpp" +#include "std_algorithms/Kokkos_LexicographicalCompare.hpp" +#include "std_algorithms/Kokkos_Mismatch.hpp" +#include "std_algorithms/Kokkos_Search.hpp" +#include "std_algorithms/Kokkos_SearchN.hpp" -// is_sorted_until, is_sorted -#include +// modifying sequence +#include "std_algorithms/Kokkos_Fill.hpp" +#include "std_algorithms/Kokkos_FillN.hpp" +#include "std_algorithms/Kokkos_Replace.hpp" +#include "std_algorithms/Kokkos_ReplaceIf.hpp" +#include "std_algorithms/Kokkos_ReplaceCopyIf.hpp" +#include "std_algorithms/Kokkos_ReplaceCopy.hpp" +#include "std_algorithms/Kokkos_Copy.hpp" +#include "std_algorithms/Kokkos_CopyN.hpp" +#include "std_algorithms/Kokkos_CopyBackward.hpp" +#include "std_algorithms/Kokkos_CopyIf.hpp" +#include "std_algorithms/Kokkos_Transform.hpp" +#include "std_algorithms/Kokkos_Generate.hpp" +#include "std_algorithms/Kokkos_GenerateN.hpp" +#include "std_algorithms/Kokkos_Reverse.hpp" +#include "std_algorithms/Kokkos_ReverseCopy.hpp" +#include "std_algorithms/Kokkos_Move.hpp" +#include "std_algorithms/Kokkos_MoveBackward.hpp" +#include "std_algorithms/Kokkos_SwapRanges.hpp" +#include "std_algorithms/Kokkos_Unique.hpp" +#include "std_algorithms/Kokkos_UniqueCopy.hpp" +#include "std_algorithms/Kokkos_Rotate.hpp" +#include "std_algorithms/Kokkos_RotateCopy.hpp" +#include "std_algorithms/Kokkos_Remove.hpp" +#include "std_algorithms/Kokkos_RemoveIf.hpp" +#include "std_algorithms/Kokkos_RemoveCopy.hpp" +#include "std_algorithms/Kokkos_RemoveCopyIf.hpp" +#include "std_algorithms/Kokkos_ShiftLeft.hpp" +#include "std_algorithms/Kokkos_ShiftRight.hpp" -// min_element, max_element, minmax_element -#include +// sorting +#include "std_algorithms/Kokkos_IsSortedUntil.hpp" +#include "std_algorithms/Kokkos_IsSorted.hpp" -// is_partitioned, partition_copy, partition_point -#include +// min/max element +#include "std_algorithms/Kokkos_MinElement.hpp" +#include "std_algorithms/Kokkos_MaxElement.hpp" +#include "std_algorithms/Kokkos_MinMaxElement.hpp" -// adjacent_difference -// reduce, transform_reduce -// exclusive_scan, transform_exclusive_scan -// inclusive_scan, transform_inclusive_scan -#include +// partitioning +#include "std_algorithms/Kokkos_IsPartitioned.hpp" +#include "std_algorithms/Kokkos_PartitionCopy.hpp" +#include "std_algorithms/Kokkos_PartitionPoint.hpp" +// numeric +#include "std_algorithms/Kokkos_AdjacentDifference.hpp" +#include "std_algorithms/Kokkos_Reduce.hpp" +#include "std_algorithms/Kokkos_TransformReduce.hpp" +#include "std_algorithms/Kokkos_ExclusiveScan.hpp" +#include "std_algorithms/Kokkos_TransformExclusiveScan.hpp" +#include "std_algorithms/Kokkos_InclusiveScan.hpp" +#include "std_algorithms/Kokkos_TransformInclusiveScan.hpp" + +#ifdef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_STD_ALGORITHMS +#undef KOKKOS_IMPL_PUBLIC_INCLUDE +#undef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_STD_ALGORITHMS +#endif #endif diff --git a/lib/kokkos/algorithms/src/std_algorithms/numeric/Kokkos_AdjacentDifference.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_AdjacentDifference.hpp similarity index 72% rename from lib/kokkos/algorithms/src/std_algorithms/numeric/Kokkos_AdjacentDifference.hpp rename to lib/kokkos/algorithms/src/std_algorithms/Kokkos_AdjacentDifference.hpp index 03e5fd6aeb..0a7cf06f5b 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/numeric/Kokkos_AdjacentDifference.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_AdjacentDifference.hpp @@ -42,106 +42,15 @@ //@HEADER */ -#ifndef KOKKOS_STD_NUMERICS_ADJACENT_DIFFERENCE_HPP -#define KOKKOS_STD_NUMERICS_ADJACENT_DIFFERENCE_HPP +#ifndef KOKKOS_STD_ALGORITHMS_ADJACENT_DIFFERENCE_HPP +#define KOKKOS_STD_ALGORITHMS_ADJACENT_DIFFERENCE_HPP -#include -#include "../Kokkos_BeginEnd.hpp" -#include "../Kokkos_Constraints.hpp" -#include "../Kokkos_Distance.hpp" +#include "impl/Kokkos_AdjacentDifference.hpp" +#include "Kokkos_BeginEnd.hpp" namespace Kokkos { namespace Experimental { -namespace Impl { -// ------------------------ -// -// functors -// -// ------------------------ -template -struct StdAdjacentDifferenceDefaultBinaryOpFunctor { - KOKKOS_FUNCTION - constexpr RetType operator()(const ValueType1& a, const ValueType2& b) const { - return a - b; - } -}; - -template -struct StdAdjacentDiffFunctor { - using index_type = typename InputIteratorType::difference_type; - - const InputIteratorType m_first_from; - const OutputIteratorType m_first_dest; - BinaryOperator m_op; - - KOKKOS_FUNCTION - void operator()(const index_type i) const { - const auto& my_value = m_first_from[i]; - if (i == 0) { - m_first_dest[i] = my_value; - } else { - const auto& left_value = m_first_from[i - 1]; - m_first_dest[i] = m_op(my_value, left_value); - } - } - - KOKKOS_FUNCTION - StdAdjacentDiffFunctor(InputIteratorType first_from, - OutputIteratorType first_dest, BinaryOperator op) - : m_first_from(std::move(first_from)), - m_first_dest(std::move(first_dest)), - m_op(std::move(op)) {} -}; - -// ------------------------------------------ -// adjacent_difference_impl -// ------------------------------------------ -template -OutputIteratorType adjacent_difference_impl(const std::string& label, - const ExecutionSpace& ex, - InputIteratorType first_from, - InputIteratorType last_from, - OutputIteratorType first_dest, - BinaryOp bin_op) { - // checks - Impl::static_assert_random_access_and_accessible(ex, first_from, first_dest); - Impl::static_assert_iterators_have_matching_difference_type(first_from, - first_dest); - Impl::expect_valid_range(first_from, last_from); - - if (first_from == last_from) { - return first_dest; - } - - // aliases - using value_type = typename OutputIteratorType::value_type; - using aux_view_type = ::Kokkos::View; - using functor_t = - StdAdjacentDiffFunctor; - - // run - const auto num_elements = - Kokkos::Experimental::distance(first_from, last_from); - aux_view_type aux_view("aux_view", num_elements); - ::Kokkos::parallel_for(label, - RangePolicy(ex, 0, num_elements), - functor_t(first_from, first_dest, bin_op)); - ex.fence("Kokkos::adjacent_difference: fence after operation"); - - // return - return first_dest + num_elements; -} - -} // end namespace Impl - -// ------------------------ -// -// public API -// -// ------------------------ template std::enable_if_t::value, diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_AdjacentFind.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_AdjacentFind.hpp new file mode 100644 index 0000000000..332f9dd369 --- /dev/null +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_AdjacentFind.hpp @@ -0,0 +1,124 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_ADJACENT_FIND_HPP +#define KOKKOS_STD_ALGORITHMS_ADJACENT_FIND_HPP + +#include "impl/Kokkos_AdjacentFind.hpp" +#include "Kokkos_BeginEnd.hpp" + +namespace Kokkos { +namespace Experimental { + +// overload set1 +template +IteratorType adjacent_find(const ExecutionSpace& ex, IteratorType first, + IteratorType last) { + return Impl::adjacent_find_impl("Kokkos::adjacent_find_iterator_api_default", + ex, first, last); +} + +template +IteratorType adjacent_find(const std::string& label, const ExecutionSpace& ex, + IteratorType first, IteratorType last) { + return Impl::adjacent_find_impl(label, ex, first, last); +} + +template +auto adjacent_find(const ExecutionSpace& ex, + const ::Kokkos::View& v) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + namespace KE = ::Kokkos::Experimental; + return Impl::adjacent_find_impl("Kokkos::adjacent_find_view_api_default", ex, + KE::begin(v), KE::end(v)); +} + +template +auto adjacent_find(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View& v) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + namespace KE = ::Kokkos::Experimental; + return Impl::adjacent_find_impl(label, ex, KE::begin(v), KE::end(v)); +} + +// overload set2 +template +IteratorType adjacent_find(const ExecutionSpace& ex, IteratorType first, + IteratorType last, BinaryPredicateType pred) { + return Impl::adjacent_find_impl("Kokkos::adjacent_find_iterator_api_default", + ex, first, last, pred); +} + +template +IteratorType adjacent_find(const std::string& label, const ExecutionSpace& ex, + IteratorType first, IteratorType last, + BinaryPredicateType pred) { + return Impl::adjacent_find_impl(label, ex, first, last, pred); +} + +template +auto adjacent_find(const ExecutionSpace& ex, + const ::Kokkos::View& v, + BinaryPredicateType pred) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + namespace KE = ::Kokkos::Experimental; + return Impl::adjacent_find_impl("Kokkos::adjacent_find_view_api_default", ex, + KE::begin(v), KE::end(v), pred); +} + +template +auto adjacent_find(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View& v, + BinaryPredicateType pred) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + namespace KE = ::Kokkos::Experimental; + return Impl::adjacent_find_impl(label, ex, KE::begin(v), KE::end(v), pred); +} + +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_AllOf.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_AllOf.hpp new file mode 100644 index 0000000000..66a49541f3 --- /dev/null +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_AllOf.hpp @@ -0,0 +1,94 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_ALL_OF_HPP +#define KOKKOS_STD_ALGORITHMS_ALL_OF_HPP + +#include "impl/Kokkos_AllOfAnyOfNoneOf.hpp" +#include "Kokkos_BeginEnd.hpp" + +namespace Kokkos { +namespace Experimental { + +template +bool all_of(const ExecutionSpace& ex, InputIterator first, InputIterator last, + Predicate predicate) { + return Impl::all_of_impl("Kokkos::all_of_iterator_api_default", ex, first, + last, predicate); +} + +template +bool all_of(const std::string& label, const ExecutionSpace& ex, + InputIterator first, InputIterator last, Predicate predicate) { + return Impl::all_of_impl(label, ex, first, last, predicate); +} + +template +bool all_of(const ExecutionSpace& ex, + const ::Kokkos::View& v, + Predicate predicate) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + + namespace KE = ::Kokkos::Experimental; + return Impl::all_of_impl("Kokkos::all_of_view_api_default", ex, KE::cbegin(v), + KE::cend(v), std::move(predicate)); +} + +template +bool all_of(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View& v, + Predicate predicate) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + + namespace KE = ::Kokkos::Experimental; + return Impl::all_of_impl(label, ex, KE::cbegin(v), KE::cend(v), + std::move(predicate)); +} + +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_AnyOf.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_AnyOf.hpp new file mode 100644 index 0000000000..e50e90f6da --- /dev/null +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_AnyOf.hpp @@ -0,0 +1,94 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_ANY_OF_HPP +#define KOKKOS_STD_ALGORITHMS_ANY_OF_HPP + +#include "impl/Kokkos_AllOfAnyOfNoneOf.hpp" +#include "Kokkos_BeginEnd.hpp" + +namespace Kokkos { +namespace Experimental { + +template +bool any_of(const ExecutionSpace& ex, InputIterator first, InputIterator last, + Predicate predicate) { + return Impl::any_of_impl("Kokkos::any_of_view_api_default", ex, first, last, + predicate); +} + +template +bool any_of(const std::string& label, const ExecutionSpace& ex, + InputIterator first, InputIterator last, Predicate predicate) { + return Impl::any_of_impl(label, ex, first, last, predicate); +} + +template +bool any_of(const ExecutionSpace& ex, + const ::Kokkos::View& v, + Predicate predicate) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + + namespace KE = ::Kokkos::Experimental; + return Impl::any_of_impl("Kokkos::any_of_view_api_default", ex, KE::cbegin(v), + KE::cend(v), std::move(predicate)); +} + +template +bool any_of(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View& v, + Predicate predicate) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + + namespace KE = ::Kokkos::Experimental; + return Impl::any_of_impl(label, ex, KE::cbegin(v), KE::cend(v), + std::move(predicate)); +} + +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_BeginEnd.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_BeginEnd.hpp index beb53fdd70..5449196192 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_BeginEnd.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_BeginEnd.hpp @@ -46,8 +46,8 @@ #define KOKKOS_BEGIN_END_HPP #include -#include "Kokkos_RandomAccessIterator.hpp" -#include "Kokkos_Constraints.hpp" +#include "impl/Kokkos_RandomAccessIterator.hpp" +#include "impl/Kokkos_Constraints.hpp" /// \file Kokkos_BeginEnd.hpp /// \brief Kokkos begin, end, cbegin, cend diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Copy.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Copy.hpp new file mode 100644 index 0000000000..b3237041b7 --- /dev/null +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Copy.hpp @@ -0,0 +1,97 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_COPY_HPP +#define KOKKOS_STD_ALGORITHMS_COPY_HPP + +#include "impl/Kokkos_CopyCopyN.hpp" +#include "Kokkos_BeginEnd.hpp" + +namespace Kokkos { +namespace Experimental { + +template +OutputIterator copy(const ExecutionSpace& ex, InputIterator first, + InputIterator last, OutputIterator d_first) { + return Impl::copy_impl("Kokkos::copy_iterator_api_default", ex, first, last, + d_first); +} + +template +OutputIterator copy(const std::string& label, const ExecutionSpace& ex, + InputIterator first, InputIterator last, + OutputIterator d_first) { + return Impl::copy_impl(label, ex, first, last, d_first); +} + +template +auto copy(const ExecutionSpace& ex, + const ::Kokkos::View& source, + ::Kokkos::View& dest) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); + + namespace KE = ::Kokkos::Experimental; + return Impl::copy_impl("Kokkos::copy_view_api_default", ex, + KE::cbegin(source), KE::cend(source), KE::begin(dest)); +} + +template +auto copy(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View& source, + ::Kokkos::View& dest) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); + + namespace KE = ::Kokkos::Experimental; + return Impl::copy_impl(label, ex, KE::cbegin(source), KE::cend(source), + KE::begin(dest)); +} + +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_CopyBackward.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_CopyBackward.hpp new file mode 100644 index 0000000000..83efd96672 --- /dev/null +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_CopyBackward.hpp @@ -0,0 +1,95 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_COPY_BACKWARD_HPP +#define KOKKOS_STD_ALGORITHMS_COPY_BACKWARD_HPP + +#include "impl/Kokkos_CopyBackward.hpp" +#include "Kokkos_BeginEnd.hpp" + +namespace Kokkos { +namespace Experimental { + +template +IteratorType2 copy_backward(const ExecutionSpace& ex, IteratorType1 first, + IteratorType1 last, IteratorType2 d_last) { + return Impl::copy_backward_impl("Kokkos::copy_backward_iterator_api_default", + ex, first, last, d_last); +} + +template +IteratorType2 copy_backward(const std::string& label, const ExecutionSpace& ex, + IteratorType1 first, IteratorType1 last, + IteratorType2 d_last) { + return Impl::copy_backward_impl(label, ex, first, last, d_last); +} + +template +auto copy_backward(const ExecutionSpace& ex, + const ::Kokkos::View& source, + ::Kokkos::View& dest) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); + + return Impl::copy_backward_impl("Kokkos::copy_backward_view_api_default", ex, + cbegin(source), cend(source), end(dest)); +} + +template +auto copy_backward(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View& source, + ::Kokkos::View& dest) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); + + return Impl::copy_backward_impl(label, ex, cbegin(source), cend(source), + end(dest)); +} + +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_CopyIf.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_CopyIf.hpp new file mode 100644 index 0000000000..c83cc29886 --- /dev/null +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_CopyIf.hpp @@ -0,0 +1,99 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_COPY_IF_HPP +#define KOKKOS_STD_ALGORITHMS_COPY_IF_HPP + +#include "impl/Kokkos_CopyIf.hpp" +#include "Kokkos_BeginEnd.hpp" + +namespace Kokkos { +namespace Experimental { + +template +OutputIterator copy_if(const ExecutionSpace& ex, InputIterator first, + InputIterator last, OutputIterator d_first, + Predicate pred) { + return Impl::copy_if_impl("Kokkos::copy_if_iterator_api_default", ex, first, + last, d_first, std::move(pred)); +} + +template +OutputIterator copy_if(const std::string& label, const ExecutionSpace& ex, + InputIterator first, InputIterator last, + OutputIterator d_first, Predicate pred) { + return Impl::copy_if_impl(label, ex, first, last, d_first, std::move(pred)); +} + +template +auto copy_if(const ExecutionSpace& ex, + const ::Kokkos::View& source, + ::Kokkos::View& dest, Predicate pred) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); + + return Impl::copy_if_impl("Kokkos::copy_if_view_api_default", ex, + cbegin(source), cend(source), begin(dest), + std::move(pred)); +} + +template +auto copy_if(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View& source, + ::Kokkos::View& dest, Predicate pred) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); + + return Impl::copy_if_impl(label, ex, cbegin(source), cend(source), + begin(dest), std::move(pred)); +} + +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_CopyN.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_CopyN.hpp new file mode 100644 index 0000000000..7f3b9374c7 --- /dev/null +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_CopyN.hpp @@ -0,0 +1,98 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_COPY_N_HPP +#define KOKKOS_STD_ALGORITHMS_COPY_N_HPP + +#include "impl/Kokkos_CopyCopyN.hpp" +#include "Kokkos_BeginEnd.hpp" + +namespace Kokkos { +namespace Experimental { + +template +OutputIterator copy_n(const ExecutionSpace& ex, InputIterator first, Size count, + OutputIterator result) { + return Impl::copy_n_impl("Kokkos::copy_n_iterator_api_default", ex, first, + count, result); +} + +template +OutputIterator copy_n(const std::string& label, const ExecutionSpace& ex, + InputIterator first, Size count, OutputIterator result) { + return Impl::copy_n_impl(label, ex, first, count, result); +} + +template +auto copy_n(const ExecutionSpace& ex, + const ::Kokkos::View& source, Size count, + ::Kokkos::View& dest) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); + + namespace KE = ::Kokkos::Experimental; + return Impl::copy_n_impl("Kokkos::copy_n_view_api_default", ex, + KE::cbegin(source), count, KE::begin(dest)); +} + +template +auto copy_n(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View& source, Size count, + ::Kokkos::View& dest) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(source); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(dest); + + namespace KE = ::Kokkos::Experimental; + return Impl::copy_n_impl(label, ex, KE::cbegin(source), count, + KE::begin(dest)); +} + +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Count.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Count.hpp new file mode 100644 index 0000000000..a885ee4ad2 --- /dev/null +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Count.hpp @@ -0,0 +1,94 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_COUNT_HPP +#define KOKKOS_STD_ALGORITHMS_COUNT_HPP + +#include "impl/Kokkos_CountCountIf.hpp" +#include "Kokkos_BeginEnd.hpp" + +namespace Kokkos { +namespace Experimental { + +template +typename IteratorType::difference_type count(const ExecutionSpace& ex, + IteratorType first, + IteratorType last, + const T& value) { + return Impl::count_impl("Kokkos::count_iterator_api_default", ex, first, last, + value); +} + +template +typename IteratorType::difference_type count(const std::string& label, + const ExecutionSpace& ex, + IteratorType first, + IteratorType last, + const T& value) { + return Impl::count_impl(label, ex, first, last, value); +} + +template +auto count(const ExecutionSpace& ex, + const ::Kokkos::View& v, const T& value) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + + namespace KE = ::Kokkos::Experimental; + return Impl::count_impl("Kokkos::count_view_api_default", ex, KE::cbegin(v), + KE::cend(v), value); +} + +template +auto count(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View& v, const T& value) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + + namespace KE = ::Kokkos::Experimental; + return Impl::count_impl(label, ex, KE::cbegin(v), KE::cend(v), value); +} + +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_CountIf.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_CountIf.hpp new file mode 100644 index 0000000000..98b9d74c4c --- /dev/null +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_CountIf.hpp @@ -0,0 +1,99 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_COUNT_IF_HPP +#define KOKKOS_STD_ALGORITHMS_COUNT_IF_HPP + +#include "impl/Kokkos_CountCountIf.hpp" +#include "Kokkos_BeginEnd.hpp" + +namespace Kokkos { +namespace Experimental { + +template +typename IteratorType::difference_type count_if(const ExecutionSpace& ex, + IteratorType first, + IteratorType last, + Predicate predicate) { + return Impl::count_if_impl("Kokkos::count_if_iterator_api_default", ex, first, + last, std::move(predicate)); +} + +template +typename IteratorType::difference_type count_if(const std::string& label, + const ExecutionSpace& ex, + IteratorType first, + IteratorType last, + Predicate predicate) { + return Impl::count_if_impl(label, ex, first, last, std::move(predicate)); +} + +template +auto count_if(const ExecutionSpace& ex, + const ::Kokkos::View& v, + Predicate predicate) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + + namespace KE = ::Kokkos::Experimental; + return Impl::count_if_impl("Kokkos::count_if_view_api_default", ex, + KE::cbegin(v), KE::cend(v), std::move(predicate)); +} + +template +auto count_if(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View& v, + Predicate predicate) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + + namespace KE = ::Kokkos::Experimental; + return Impl::count_if_impl(label, ex, KE::cbegin(v), KE::cend(v), + std::move(predicate)); +} + +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Distance.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Distance.hpp index ced4370472..4e148642b1 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Distance.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Distance.hpp @@ -45,8 +45,8 @@ #ifndef KOKKOS_STD_ALGORITHMS_DISTANCE_HPP #define KOKKOS_STD_ALGORITHMS_DISTANCE_HPP -#include "Kokkos_Constraints.hpp" -#include "Kokkos_RandomAccessIterator.hpp" +#include "impl/Kokkos_Constraints.hpp" +#include "impl/Kokkos_RandomAccessIterator.hpp" namespace Kokkos { namespace Experimental { diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Equal.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Equal.hpp new file mode 100644 index 0000000000..8634019fad --- /dev/null +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Equal.hpp @@ -0,0 +1,198 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_EQUAL_HPP +#define KOKKOS_STD_ALGORITHMS_EQUAL_HPP + +#include "impl/Kokkos_Equal.hpp" +#include "Kokkos_BeginEnd.hpp" + +namespace Kokkos { +namespace Experimental { + +template +std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators< + IteratorType1, IteratorType2>::value, + bool> +equal(const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2) { + return Impl::equal_impl("Kokkos::equal_iterator_api_default", ex, first1, + last1, first2); +} + +template +std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators< + IteratorType1, IteratorType2>::value, + bool> +equal(const std::string& label, const ExecutionSpace& ex, IteratorType1 first1, + IteratorType1 last1, IteratorType2 first2) { + return Impl::equal_impl(label, ex, first1, last1, first2); +} + +template +std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators< + IteratorType1, IteratorType2>::value, + bool> +equal(const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2, BinaryPredicateType predicate) { + return Impl::equal_impl("Kokkos::equal_iterator_api_default", ex, first1, + last1, first2, std::move(predicate)); +} + +template +std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators< + IteratorType1, IteratorType2>::value, + bool> +equal(const std::string& label, const ExecutionSpace& ex, IteratorType1 first1, + IteratorType1 last1, IteratorType2 first2, + BinaryPredicateType predicate) { + return Impl::equal_impl(label, ex, first1, last1, first2, + std::move(predicate)); +} + +template +bool equal(const ExecutionSpace& ex, + const ::Kokkos::View& view1, + ::Kokkos::View& view2) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); + + namespace KE = ::Kokkos::Experimental; + return Impl::equal_impl("Kokkos::equal_view_api_default", ex, + KE::cbegin(view1), KE::cend(view1), + KE::cbegin(view2)); +} + +template +bool equal(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View& view1, + ::Kokkos::View& view2) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); + + namespace KE = ::Kokkos::Experimental; + return Impl::equal_impl(label, ex, KE::cbegin(view1), KE::cend(view1), + KE::cbegin(view2)); +} + +template +bool equal(const ExecutionSpace& ex, + const ::Kokkos::View& view1, + ::Kokkos::View& view2, + BinaryPredicateType predicate) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); + + namespace KE = ::Kokkos::Experimental; + return Impl::equal_impl("Kokkos::equal_view_api_default", ex, + KE::cbegin(view1), KE::cend(view1), KE::cbegin(view2), + std::move(predicate)); +} + +template +bool equal(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View& view1, + ::Kokkos::View& view2, + BinaryPredicateType predicate) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); + + namespace KE = ::Kokkos::Experimental; + return Impl::equal_impl(label, ex, KE::cbegin(view1), KE::cend(view1), + KE::cbegin(view2), std::move(predicate)); +} + +template +std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators< + IteratorType1, IteratorType2>::value, + bool> +equal(const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2, IteratorType2 last2) { + return Impl::equal_impl("Kokkos::equal_iterator_api_default", ex, first1, + last1, first2, last2); +} + +template +std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators< + IteratorType1, IteratorType2>::value, + bool> +equal(const std::string& label, const ExecutionSpace& ex, IteratorType1 first1, + IteratorType1 last1, IteratorType2 first2, IteratorType2 last2) { + return Impl::equal_impl(label, ex, first1, last1, first2, last2); +} + +template +std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators< + IteratorType1, IteratorType2>::value, + bool> +equal(const ExecutionSpace& ex, IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2, IteratorType2 last2, + BinaryPredicateType predicate) { + return Impl::equal_impl("Kokkos::equal_iterator_api_default", ex, first1, + last1, first2, last2, std::move(predicate)); +} + +template +std::enable_if_t< ::Kokkos::Experimental::Impl::are_iterators< + IteratorType1, IteratorType2>::value, + bool> +equal(const std::string& label, const ExecutionSpace& ex, IteratorType1 first1, + IteratorType1 last1, IteratorType2 first2, IteratorType2 last2, + BinaryPredicateType predicate) { + return Impl::equal_impl(label, ex, first1, last1, first2, last2, + std::move(predicate)); +} + +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ExclusiveScan.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ExclusiveScan.hpp new file mode 100644 index 0000000000..b97710f24f --- /dev/null +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ExclusiveScan.hpp @@ -0,0 +1,190 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_EXCLUSIVE_SCAN_HPP +#define KOKKOS_STD_ALGORITHMS_EXCLUSIVE_SCAN_HPP + +#include "impl/Kokkos_ExclusiveScan.hpp" +#include "Kokkos_BeginEnd.hpp" + +namespace Kokkos { +namespace Experimental { + +// overload set 1 +template +std::enable_if_t<::Kokkos::Experimental::Impl::are_iterators< + InputIteratorType, OutputIteratorType>::value, + OutputIteratorType> +exclusive_scan(const ExecutionSpace& ex, InputIteratorType first, + InputIteratorType last, OutputIteratorType first_dest, + ValueType init_value) { + static_assert(std::is_move_constructible::value, + "ValueType must be move constructible."); + return Impl::exclusive_scan_default_op_impl( + "Kokkos::exclusive_scan_default_functors_iterator_api", ex, first, last, + first_dest, init_value); +} + +template +std::enable_if_t<::Kokkos::Experimental::Impl::are_iterators< + InputIteratorType, OutputIteratorType>::value, + OutputIteratorType> +exclusive_scan(const std::string& label, const ExecutionSpace& ex, + InputIteratorType first, InputIteratorType last, + OutputIteratorType first_dest, ValueType init_value) { + static_assert(std::is_move_constructible::value, + "ValueType must be move constructible."); + return Impl::exclusive_scan_default_op_impl(label, ex, first, last, + first_dest, init_value); +} + +template +auto exclusive_scan(const ExecutionSpace& ex, + const ::Kokkos::View& view_from, + const ::Kokkos::View& view_dest, + ValueType init_value) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + static_assert(std::is_move_constructible::value, + "ValueType must be move constructible."); + namespace KE = ::Kokkos::Experimental; + return Impl::exclusive_scan_default_op_impl( + "Kokkos::exclusive_scan_default_functors_view_api", ex, + KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), + init_value); +} + +template +auto exclusive_scan(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View& view_from, + const ::Kokkos::View& view_dest, + ValueType init_value) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + static_assert(std::is_move_constructible::value, + "ValueType must be move constructible."); + namespace KE = ::Kokkos::Experimental; + return Impl::exclusive_scan_default_op_impl(label, ex, KE::cbegin(view_from), + KE::cend(view_from), + KE::begin(view_dest), init_value); +} + +// overload set 2 +template +std::enable_if_t<::Kokkos::Experimental::Impl::are_iterators< + InputIteratorType, OutputIteratorType>::value, + OutputIteratorType> +exclusive_scan(const ExecutionSpace& ex, InputIteratorType first, + InputIteratorType last, OutputIteratorType first_dest, + ValueType init_value, BinaryOpType bop) { + Impl::static_assert_is_not_openmptarget(ex); + static_assert(std::is_move_constructible::value, + "ValueType must be move constructible."); + return Impl::exclusive_scan_custom_op_impl( + "Kokkos::exclusive_scan_custom_functors_iterator_api", ex, first, last, + first_dest, init_value, bop); +} + +template +std::enable_if_t<::Kokkos::Experimental::Impl::are_iterators< + InputIteratorType, OutputIteratorType>::value, + OutputIteratorType> +exclusive_scan(const std::string& label, const ExecutionSpace& ex, + InputIteratorType first, InputIteratorType last, + OutputIteratorType first_dest, ValueType init_value, + BinaryOpType bop) { + Impl::static_assert_is_not_openmptarget(ex); + static_assert(std::is_move_constructible::value, + "ValueType must be move constructible."); + return Impl::exclusive_scan_custom_op_impl(label, ex, first, last, first_dest, + init_value, bop); +} + +template +auto exclusive_scan(const ExecutionSpace& ex, + const ::Kokkos::View& view_from, + const ::Kokkos::View& view_dest, + ValueType init_value, BinaryOpType bop) { + Impl::static_assert_is_not_openmptarget(ex); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + static_assert(std::is_move_constructible::value, + "ValueType must be move constructible."); + namespace KE = ::Kokkos::Experimental; + return Impl::exclusive_scan_custom_op_impl( + "Kokkos::exclusive_scan_custom_functors_view_api", ex, + KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), + init_value, bop); +} + +template +auto exclusive_scan(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View& view_from, + const ::Kokkos::View& view_dest, + ValueType init_value, BinaryOpType bop) { + Impl::static_assert_is_not_openmptarget(ex); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + static_assert(std::is_move_constructible::value, + "ValueType must be move constructible."); + namespace KE = ::Kokkos::Experimental; + return Impl::exclusive_scan_custom_op_impl( + label, ex, KE::cbegin(view_from), KE::cend(view_from), + KE::begin(view_dest), init_value, bop); +} + +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Fill.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Fill.hpp new file mode 100644 index 0000000000..200e03b9dc --- /dev/null +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Fill.hpp @@ -0,0 +1,86 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_FILL_HPP +#define KOKKOS_STD_ALGORITHMS_FILL_HPP + +#include "impl/Kokkos_FillFillN.hpp" +#include "Kokkos_BeginEnd.hpp" + +namespace Kokkos { +namespace Experimental { + +template +void fill(const ExecutionSpace& ex, IteratorType first, IteratorType last, + const T& value) { + Impl::fill_impl("Kokkos::fill_iterator_api_default", ex, first, last, value); +} + +template +void fill(const std::string& label, const ExecutionSpace& ex, + IteratorType first, IteratorType last, const T& value) { + Impl::fill_impl(label, ex, first, last, value); +} + +template +void fill(const ExecutionSpace& ex, + const ::Kokkos::View& view, const T& value) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + + Impl::fill_impl("Kokkos::fill_view_api_default", ex, begin(view), end(view), + value); +} + +template +void fill(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View& view, const T& value) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + + Impl::fill_impl(label, ex, begin(view), end(view), value); +} + +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_FillN.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_FillN.hpp new file mode 100644 index 0000000000..2e814dc55f --- /dev/null +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_FillN.hpp @@ -0,0 +1,91 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_FILL_N_HPP +#define KOKKOS_STD_ALGORITHMS_FILL_N_HPP + +#include "impl/Kokkos_FillFillN.hpp" +#include "Kokkos_BeginEnd.hpp" + +namespace Kokkos { +namespace Experimental { + +template +IteratorType fill_n(const ExecutionSpace& ex, IteratorType first, SizeType n, + const T& value) { + return Impl::fill_n_impl("Kokkos::fill_n_iterator_api_default", ex, first, n, + value); +} + +template +IteratorType fill_n(const std::string& label, const ExecutionSpace& ex, + IteratorType first, SizeType n, const T& value) { + return Impl::fill_n_impl(label, ex, first, n, value); +} + +template +auto fill_n(const ExecutionSpace& ex, + const ::Kokkos::View& view, SizeType n, + const T& value) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + + return Impl::fill_n_impl("Kokkos::fill_n_view_api_default", ex, begin(view), + n, value); +} + +template +auto fill_n(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View& view, SizeType n, + const T& value) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + + return Impl::fill_n_impl(label, ex, begin(view), n, value); +} + +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Find.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Find.hpp new file mode 100644 index 0000000000..6758f00ce4 --- /dev/null +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Find.hpp @@ -0,0 +1,89 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_FIND_HPP +#define KOKKOS_STD_ALGORITHMS_FIND_HPP + +#include "impl/Kokkos_FindIfOrNot.hpp" +#include "Kokkos_BeginEnd.hpp" + +namespace Kokkos { +namespace Experimental { + +template +InputIterator find(const ExecutionSpace& ex, InputIterator first, + InputIterator last, const T& value) { + return Impl::find_impl("Kokkos::find_iterator_api_default", ex, first, last, + value); +} + +template +InputIterator find(const std::string& label, const ExecutionSpace& ex, + InputIterator first, InputIterator last, const T& value) { + return Impl::find_impl(label, ex, first, last, value); +} + +template +auto find(const ExecutionSpace& ex, + const ::Kokkos::View& view, const T& value) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + + namespace KE = ::Kokkos::Experimental; + return Impl::find_impl("Kokkos::find_view_api_default", ex, KE::begin(view), + KE::end(view), value); +} + +template +auto find(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View& view, const T& value) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + + namespace KE = ::Kokkos::Experimental; + return Impl::find_impl(label, ex, KE::begin(view), KE::end(view), value); +} + +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_FindEnd.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_FindEnd.hpp new file mode 100644 index 0000000000..61b54c8225 --- /dev/null +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_FindEnd.hpp @@ -0,0 +1,149 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_FIND_END_HPP +#define KOKKOS_STD_ALGORITHMS_FIND_END_HPP + +#include "impl/Kokkos_FindEnd.hpp" +#include "Kokkos_Equal.hpp" +#include "Kokkos_BeginEnd.hpp" + +namespace Kokkos { +namespace Experimental { + +// overload set 1: no binary predicate passed +template +IteratorType1 find_end(const ExecutionSpace& ex, IteratorType1 first, + IteratorType1 last, IteratorType2 s_first, + IteratorType2 s_last) { + return Impl::find_end_impl("Kokkos::find_end_iterator_api_default", ex, first, + last, s_first, s_last); +} + +template +IteratorType1 find_end(const std::string& label, const ExecutionSpace& ex, + IteratorType1 first, IteratorType1 last, + IteratorType2 s_first, IteratorType2 s_last) { + return Impl::find_end_impl(label, ex, first, last, s_first, s_last); +} + +template +auto find_end(const ExecutionSpace& ex, + const ::Kokkos::View& view, + const ::Kokkos::View& s_view) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); + + namespace KE = ::Kokkos::Experimental; + return Impl::find_end_impl("Kokkos::find_end_view_api_default", ex, + KE::begin(view), KE::end(view), KE::begin(s_view), + KE::end(s_view)); +} + +template +auto find_end(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View& view, + const ::Kokkos::View& s_view) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); + + namespace KE = ::Kokkos::Experimental; + return Impl::find_end_impl(label, ex, KE::begin(view), KE::end(view), + KE::begin(s_view), KE::end(s_view)); +} + +// overload set 2: binary predicate passed +template +IteratorType1 find_end(const ExecutionSpace& ex, IteratorType1 first, + IteratorType1 last, IteratorType2 s_first, + IteratorType2 s_last, const BinaryPredicateType& pred) { + return Impl::find_end_impl("Kokkos::find_end_iterator_api_default", ex, first, + last, s_first, s_last, pred); +} + +template +IteratorType1 find_end(const std::string& label, const ExecutionSpace& ex, + IteratorType1 first, IteratorType1 last, + IteratorType2 s_first, IteratorType2 s_last, + const BinaryPredicateType& pred) { + return Impl::find_end_impl(label, ex, first, last, s_first, s_last, pred); +} + +template +auto find_end(const ExecutionSpace& ex, + const ::Kokkos::View& view, + const ::Kokkos::View& s_view, + const BinaryPredicateType& pred) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); + + namespace KE = ::Kokkos::Experimental; + return Impl::find_end_impl("Kokkos::find_end_view_api_default", ex, + KE::begin(view), KE::end(view), KE::begin(s_view), + KE::end(s_view), pred); +} + +template +auto find_end(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View& view, + const ::Kokkos::View& s_view, + const BinaryPredicateType& pred) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); + + namespace KE = ::Kokkos::Experimental; + return Impl::find_end_impl(label, ex, KE::begin(view), KE::end(view), + KE::begin(s_view), KE::end(s_view), pred); +} + +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_FindFirstOf.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_FindFirstOf.hpp new file mode 100644 index 0000000000..b8c27cb272 --- /dev/null +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_FindFirstOf.hpp @@ -0,0 +1,150 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_FIND_FIRST_OF_HPP +#define KOKKOS_STD_ALGORITHMS_FIND_FIRST_OF_HPP + +#include "impl/Kokkos_FindFirstOf.hpp" +#include "Kokkos_BeginEnd.hpp" + +namespace Kokkos { +namespace Experimental { + +// overload set 1: no binary predicate passed +template +IteratorType1 find_first_of(const ExecutionSpace& ex, IteratorType1 first, + IteratorType1 last, IteratorType2 s_first, + IteratorType2 s_last) { + return Impl::find_first_of_impl("Kokkos::find_first_of_iterator_api_default", + ex, first, last, s_first, s_last); +} + +template +IteratorType1 find_first_of(const std::string& label, const ExecutionSpace& ex, + IteratorType1 first, IteratorType1 last, + IteratorType2 s_first, IteratorType2 s_last) { + return Impl::find_first_of_impl(label, ex, first, last, s_first, s_last); +} + +template +auto find_first_of(const ExecutionSpace& ex, + const ::Kokkos::View& view, + const ::Kokkos::View& s_view) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); + + namespace KE = ::Kokkos::Experimental; + return Impl::find_first_of_impl("Kokkos::find_first_of_view_api_default", ex, + KE::begin(view), KE::end(view), + KE::begin(s_view), KE::end(s_view)); +} + +template +auto find_first_of(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View& view, + const ::Kokkos::View& s_view) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); + + namespace KE = ::Kokkos::Experimental; + return Impl::find_first_of_impl(label, ex, KE::begin(view), KE::end(view), + KE::begin(s_view), KE::end(s_view)); +} + +// overload set 2: binary predicate passed +template +IteratorType1 find_first_of(const ExecutionSpace& ex, IteratorType1 first, + IteratorType1 last, IteratorType2 s_first, + IteratorType2 s_last, + const BinaryPredicateType& pred) { + return Impl::find_first_of_impl("Kokkos::find_first_of_iterator_api_default", + ex, first, last, s_first, s_last, pred); +} + +template +IteratorType1 find_first_of(const std::string& label, const ExecutionSpace& ex, + IteratorType1 first, IteratorType1 last, + IteratorType2 s_first, IteratorType2 s_last, + const BinaryPredicateType& pred) { + return Impl::find_first_of_impl(label, ex, first, last, s_first, s_last, + pred); +} + +template +auto find_first_of(const ExecutionSpace& ex, + const ::Kokkos::View& view, + const ::Kokkos::View& s_view, + const BinaryPredicateType& pred) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); + + namespace KE = ::Kokkos::Experimental; + return Impl::find_first_of_impl("Kokkos::find_first_of_view_api_default", ex, + KE::begin(view), KE::end(view), + KE::begin(s_view), KE::end(s_view), pred); +} + +template +auto find_first_of(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View& view, + const ::Kokkos::View& s_view, + const BinaryPredicateType& pred) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(s_view); + + namespace KE = ::Kokkos::Experimental; + return Impl::find_first_of_impl(label, ex, KE::begin(view), KE::end(view), + KE::begin(s_view), KE::end(s_view), pred); +} + +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_FindIf.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_FindIf.hpp new file mode 100644 index 0000000000..54896da117 --- /dev/null +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_FindIf.hpp @@ -0,0 +1,95 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_FIND_IF_HPP +#define KOKKOS_STD_ALGORITHMS_FIND_IF_HPP + +#include "impl/Kokkos_FindIfOrNot.hpp" +#include "Kokkos_BeginEnd.hpp" + +namespace Kokkos { +namespace Experimental { + +template +IteratorType find_if(const ExecutionSpace& ex, IteratorType first, + IteratorType last, PredicateType predicate) { + return Impl::find_if_or_not_impl("Kokkos::find_if_iterator_api_default", + ex, first, last, std::move(predicate)); +} + +template +IteratorType find_if(const std::string& label, const ExecutionSpace& ex, + IteratorType first, IteratorType last, + PredicateType predicate) { + return Impl::find_if_or_not_impl(label, ex, first, last, + std::move(predicate)); +} + +template +auto find_if(const ExecutionSpace& ex, + const ::Kokkos::View& v, + Predicate predicate) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + namespace KE = ::Kokkos::Experimental; + return Impl::find_if_or_not_impl("Kokkos::find_if_view_api_default", ex, + KE::begin(v), KE::end(v), + std::move(predicate)); +} + +template +auto find_if(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View& v, + Predicate predicate) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + namespace KE = ::Kokkos::Experimental; + return Impl::find_if_or_not_impl(label, ex, KE::begin(v), KE::end(v), + std::move(predicate)); +} + +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_FindIfNot.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_FindIfNot.hpp new file mode 100644 index 0000000000..cfe6bb84d8 --- /dev/null +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_FindIfNot.hpp @@ -0,0 +1,98 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_FIND_IF_NOT_HPP +#define KOKKOS_STD_ALGORITHMS_FIND_IF_NOT_HPP + +#include "impl/Kokkos_FindIfOrNot.hpp" +#include "Kokkos_BeginEnd.hpp" + +namespace Kokkos { +namespace Experimental { + +template +IteratorType find_if_not(const ExecutionSpace& ex, IteratorType first, + IteratorType last, Predicate predicate) { + return Impl::find_if_or_not_impl( + "Kokkos::find_if_not_iterator_api_default", ex, first, last, + std::move(predicate)); +} + +template +IteratorType find_if_not(const std::string& label, const ExecutionSpace& ex, + IteratorType first, IteratorType last, + Predicate predicate) { + return Impl::find_if_or_not_impl(label, ex, first, last, + std::move(predicate)); +} + +template +auto find_if_not(const ExecutionSpace& ex, + const ::Kokkos::View& v, + Predicate predicate) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + + namespace KE = ::Kokkos::Experimental; + return Impl::find_if_or_not_impl( + "Kokkos::find_if_not_view_api_default", ex, KE::begin(v), KE::end(v), + std::move(predicate)); +} + +template +auto find_if_not(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View& v, + Predicate predicate) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + + namespace KE = ::Kokkos::Experimental; + return Impl::find_if_or_not_impl(label, ex, KE::begin(v), KE::end(v), + std::move(predicate)); +} + +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ForEach.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ForEach.hpp new file mode 100644 index 0000000000..8a2f90e82b --- /dev/null +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ForEach.hpp @@ -0,0 +1,95 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_FOR_EACH_HPP +#define KOKKOS_STD_ALGORITHMS_FOR_EACH_HPP + +#include "impl/Kokkos_ForEachForEachN.hpp" +#include "Kokkos_BeginEnd.hpp" + +namespace Kokkos { +namespace Experimental { + +template +UnaryFunctorType for_each(const std::string& label, const ExecutionSpace& ex, + IteratorType first, IteratorType last, + UnaryFunctorType functor) { + return Impl::for_each_impl(label, ex, first, last, std::move(functor)); +} + +template +UnaryFunctorType for_each(const ExecutionSpace& ex, IteratorType first, + IteratorType last, UnaryFunctorType functor) { + return Impl::for_each_impl("Kokkos::for_each_iterator_api_default", ex, first, + last, std::move(functor)); +} + +template +UnaryFunctorType for_each(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View& v, + UnaryFunctorType functor) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + + namespace KE = ::Kokkos::Experimental; + return Impl::for_each_impl(label, ex, KE::begin(v), KE::end(v), + std::move(functor)); +} + +template +UnaryFunctorType for_each(const ExecutionSpace& ex, + const ::Kokkos::View& v, + UnaryFunctorType functor) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + + namespace KE = ::Kokkos::Experimental; + return Impl::for_each_impl("Kokkos::for_each_view_api_default", ex, + KE::begin(v), KE::end(v), std::move(functor)); +} + +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ForEachN.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ForEachN.hpp new file mode 100644 index 0000000000..dd917a33e8 --- /dev/null +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ForEachN.hpp @@ -0,0 +1,96 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_FOR_EACH_N_HPP +#define KOKKOS_STD_ALGORITHMS_FOR_EACH_N_HPP + +#include "impl/Kokkos_ForEachForEachN.hpp" +#include "Kokkos_BeginEnd.hpp" + +namespace Kokkos { +namespace Experimental { + +template +IteratorType for_each_n(const std::string& label, const ExecutionSpace& ex, + IteratorType first, SizeType n, + UnaryFunctorType functor) { + return Impl::for_each_n_impl(label, ex, first, n, std::move(functor)); +} + +template +IteratorType for_each_n(const ExecutionSpace& ex, IteratorType first, + SizeType n, UnaryFunctorType functor) { + return Impl::for_each_n_impl("Kokkos::for_each_n_iterator_api_default", ex, + first, n, std::move(functor)); +} + +template +auto for_each_n(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View& v, SizeType n, + UnaryFunctorType functor) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + + namespace KE = ::Kokkos::Experimental; + return Impl::for_each_n_impl(label, ex, KE::begin(v), n, std::move(functor)); +} + +template +auto for_each_n(const ExecutionSpace& ex, + const ::Kokkos::View& v, SizeType n, + UnaryFunctorType functor) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + + namespace KE = ::Kokkos::Experimental; + return Impl::for_each_n_impl("Kokkos::for_each_n_view_api_default", ex, + KE::begin(v), n, std::move(functor)); +} + +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Generate.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Generate.hpp new file mode 100644 index 0000000000..955cb42d4b --- /dev/null +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_Generate.hpp @@ -0,0 +1,91 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_GENERATE_HPP +#define KOKKOS_STD_ALGORITHMS_GENERATE_HPP + +#include "impl/Kokkos_GenerateGenerateN.hpp" +#include "Kokkos_BeginEnd.hpp" + +namespace Kokkos { +namespace Experimental { + +template +void generate(const ExecutionSpace& ex, IteratorType first, IteratorType last, + Generator g) { + Impl::generate_impl("Kokkos::generate_iterator_api_default", ex, first, last, + std::move(g)); +} + +template +void generate(const std::string& label, const ExecutionSpace& ex, + IteratorType first, IteratorType last, Generator g) { + Impl::generate_impl(label, ex, first, last, std::move(g)); +} + +template +void generate(const ExecutionSpace& ex, + const ::Kokkos::View& view, + Generator g) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + + Impl::generate_impl("Kokkos::generate_view_api_default", ex, begin(view), + end(view), std::move(g)); +} + +template +void generate(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View& view, + Generator g) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + + Impl::generate_impl(label, ex, begin(view), end(view), std::move(g)); +} + +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_GenerateN.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_GenerateN.hpp new file mode 100644 index 0000000000..470edb1596 --- /dev/null +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_GenerateN.hpp @@ -0,0 +1,93 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_GENERATE_N_HPP +#define KOKKOS_STD_ALGORITHMS_GENERATE_N_HPP + +#include "impl/Kokkos_GenerateGenerateN.hpp" +#include "Kokkos_BeginEnd.hpp" + +namespace Kokkos { +namespace Experimental { + +template +IteratorType generate_n(const ExecutionSpace& ex, IteratorType first, + Size count, Generator g) { + Impl::generate_n_impl("Kokkos::generate_n_iterator_api_default", ex, first, + count, std::move(g)); + return first + count; +} + +template +IteratorType generate_n(const std::string& label, const ExecutionSpace& ex, + IteratorType first, Size count, Generator g) { + Impl::generate_n_impl(label, ex, first, count, std::move(g)); + return first + count; +} + +template +auto generate_n(const ExecutionSpace& ex, + const ::Kokkos::View& view, Size count, + Generator g) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + + return Impl::generate_n_impl("Kokkos::generate_n_view_api_default", ex, + begin(view), count, std::move(g)); +} + +template +auto generate_n(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View& view, Size count, + Generator g) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + + return Impl::generate_n_impl(label, ex, begin(view), count, std::move(g)); +} + +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_InclusiveScan.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_InclusiveScan.hpp new file mode 100644 index 0000000000..c34b5f43c9 --- /dev/null +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_InclusiveScan.hpp @@ -0,0 +1,223 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_INCLUSIVE_SCAN_HPP +#define KOKKOS_STD_ALGORITHMS_INCLUSIVE_SCAN_HPP + +#include "impl/Kokkos_InclusiveScan.hpp" +#include "Kokkos_BeginEnd.hpp" + +namespace Kokkos { +namespace Experimental { + +// overload set 1 +template +std::enable_if_t<::Kokkos::Experimental::Impl::are_iterators< + InputIteratorType, OutputIteratorType>::value, + OutputIteratorType> +inclusive_scan(const ExecutionSpace& ex, InputIteratorType first, + InputIteratorType last, OutputIteratorType first_dest) { + return Impl::inclusive_scan_default_op_impl( + "Kokkos::inclusive_scan_default_functors_iterator_api", ex, first, last, + first_dest); +} + +template +std::enable_if_t<::Kokkos::Experimental::Impl::are_iterators< + InputIteratorType, OutputIteratorType>::value, + OutputIteratorType> +inclusive_scan(const std::string& label, const ExecutionSpace& ex, + InputIteratorType first, InputIteratorType last, + OutputIteratorType first_dest) { + return Impl::inclusive_scan_default_op_impl(label, ex, first, last, + first_dest); +} + +template +auto inclusive_scan( + const ExecutionSpace& ex, + const ::Kokkos::View& view_from, + const ::Kokkos::View& view_dest) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + namespace KE = ::Kokkos::Experimental; + return Impl::inclusive_scan_default_op_impl( + "Kokkos::inclusive_scan_default_functors_view_api", ex, + KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest)); +} + +template +auto inclusive_scan( + const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View& view_from, + const ::Kokkos::View& view_dest) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + namespace KE = ::Kokkos::Experimental; + return Impl::inclusive_scan_default_op_impl(label, ex, KE::cbegin(view_from), + KE::cend(view_from), + KE::begin(view_dest)); +} + +// overload set 2 (accepting custom binary op) +template +std::enable_if_t<::Kokkos::Experimental::Impl::are_iterators< + InputIteratorType, OutputIteratorType>::value, + OutputIteratorType> +inclusive_scan(const ExecutionSpace& ex, InputIteratorType first, + InputIteratorType last, OutputIteratorType first_dest, + BinaryOp binary_op) { + return Impl::inclusive_scan_custom_binary_op_impl( + "Kokkos::inclusive_scan_custom_functors_iterator_api", ex, first, last, + first_dest, binary_op); +} + +template +std::enable_if_t<::Kokkos::Experimental::Impl::are_iterators< + InputIteratorType, OutputIteratorType>::value, + OutputIteratorType> +inclusive_scan(const std::string& label, const ExecutionSpace& ex, + InputIteratorType first, InputIteratorType last, + OutputIteratorType first_dest, BinaryOp binary_op) { + return Impl::inclusive_scan_custom_binary_op_impl(label, ex, first, last, + first_dest, binary_op); +} + +template +auto inclusive_scan(const ExecutionSpace& ex, + const ::Kokkos::View& view_from, + const ::Kokkos::View& view_dest, + BinaryOp binary_op) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + namespace KE = ::Kokkos::Experimental; + return Impl::inclusive_scan_custom_binary_op_impl( + "Kokkos::inclusive_scan_custom_functors_view_api", ex, + KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), + binary_op); +} + +template +auto inclusive_scan(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View& view_from, + const ::Kokkos::View& view_dest, + BinaryOp binary_op) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + namespace KE = ::Kokkos::Experimental; + return Impl::inclusive_scan_custom_binary_op_impl( + label, ex, KE::cbegin(view_from), KE::cend(view_from), + KE::begin(view_dest), binary_op); +} + +// overload set 3 +template +std::enable_if_t<::Kokkos::Experimental::Impl::are_iterators< + InputIteratorType, OutputIteratorType>::value, + OutputIteratorType> +inclusive_scan(const ExecutionSpace& ex, InputIteratorType first, + InputIteratorType last, OutputIteratorType first_dest, + BinaryOp binary_op, ValueType init_value) { + return Impl::inclusive_scan_custom_binary_op_impl( + "Kokkos::inclusive_scan_custom_functors_iterator_api", ex, first, last, + first_dest, binary_op, init_value); +} + +template +std::enable_if_t<::Kokkos::Experimental::Impl::are_iterators< + InputIteratorType, OutputIteratorType>::value, + OutputIteratorType> +inclusive_scan(const std::string& label, const ExecutionSpace& ex, + InputIteratorType first, InputIteratorType last, + OutputIteratorType first_dest, BinaryOp binary_op, + ValueType init_value) { + return Impl::inclusive_scan_custom_binary_op_impl( + label, ex, first, last, first_dest, binary_op, init_value); +} + +template +auto inclusive_scan(const ExecutionSpace& ex, + const ::Kokkos::View& view_from, + const ::Kokkos::View& view_dest, + BinaryOp binary_op, ValueType init_value) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + namespace KE = ::Kokkos::Experimental; + return Impl::inclusive_scan_custom_binary_op_impl( + "Kokkos::inclusive_scan_custom_functors_view_api", ex, + KE::cbegin(view_from), KE::cend(view_from), KE::begin(view_dest), + binary_op, init_value); +} + +template +auto inclusive_scan(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View& view_from, + const ::Kokkos::View& view_dest, + BinaryOp binary_op, ValueType init_value) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_from); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view_dest); + namespace KE = ::Kokkos::Experimental; + return Impl::inclusive_scan_custom_binary_op_impl( + label, ex, KE::cbegin(view_from), KE::cend(view_from), + KE::begin(view_dest), binary_op, init_value); +} + +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_IsPartitioned.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_IsPartitioned.hpp new file mode 100644 index 0000000000..8a2ca207ae --- /dev/null +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_IsPartitioned.hpp @@ -0,0 +1,92 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_IS_PARTITIONED_HPP +#define KOKKOS_STD_ALGORITHMS_IS_PARTITIONED_HPP + +#include "impl/Kokkos_IsPartitioned.hpp" +#include "Kokkos_BeginEnd.hpp" + +namespace Kokkos { +namespace Experimental { + +template +bool is_partitioned(const ExecutionSpace& ex, IteratorType first, + IteratorType last, PredicateType p) { + return Impl::is_partitioned_impl( + "Kokkos::is_partitioned_iterator_api_default", ex, first, last, + std::move(p)); +} + +template +bool is_partitioned(const std::string& label, const ExecutionSpace& ex, + IteratorType first, IteratorType last, PredicateType p) { + return Impl::is_partitioned_impl(label, ex, first, last, std::move(p)); +} + +template +bool is_partitioned(const ExecutionSpace& ex, + const ::Kokkos::View& v, + PredicateType p) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + + return Impl::is_partitioned_impl("Kokkos::is_partitioned_view_api_default", + ex, cbegin(v), cend(v), std::move(p)); +} + +template +bool is_partitioned(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View& v, + PredicateType p) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + + return Impl::is_partitioned_impl(label, ex, cbegin(v), cend(v), std::move(p)); +} + +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_IsSorted.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_IsSorted.hpp new file mode 100644 index 0000000000..0ab466f338 --- /dev/null +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_IsSorted.hpp @@ -0,0 +1,131 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_IS_SORTED_HPP +#define KOKKOS_STD_ALGORITHMS_IS_SORTED_HPP + +#include "impl/Kokkos_IsSorted.hpp" +#include "Kokkos_BeginEnd.hpp" + +namespace Kokkos { +namespace Experimental { + +template +bool is_sorted(const ExecutionSpace& ex, IteratorType first, + IteratorType last) { + return Impl::is_sorted_impl("Kokkos::is_sorted_iterator_api_default", ex, + first, last); +} + +template +bool is_sorted(const std::string& label, const ExecutionSpace& ex, + IteratorType first, IteratorType last) { + return Impl::is_sorted_impl(label, ex, first, last); +} + +template +bool is_sorted(const ExecutionSpace& ex, + const ::Kokkos::View& view) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + + namespace KE = ::Kokkos::Experimental; + return Impl::is_sorted_impl("Kokkos::is_sorted_view_api_default", ex, + KE::cbegin(view), KE::cend(view)); +} + +template +bool is_sorted(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View& view) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + + namespace KE = ::Kokkos::Experimental; + return Impl::is_sorted_impl(label, ex, KE::cbegin(view), KE::cend(view)); +} + +template +bool is_sorted(const ExecutionSpace& ex, IteratorType first, IteratorType last, + ComparatorType comp) { + Impl::static_assert_is_not_openmptarget(ex); + return Impl::is_sorted_impl("Kokkos::is_sorted_iterator_api_default", ex, + first, last, std::move(comp)); +} + +template +bool is_sorted(const std::string& label, const ExecutionSpace& ex, + IteratorType first, IteratorType last, ComparatorType comp) { + Impl::static_assert_is_not_openmptarget(ex); + return Impl::is_sorted_impl(label, ex, first, last, std::move(comp)); +} + +template +bool is_sorted(const ExecutionSpace& ex, + const ::Kokkos::View& view, + ComparatorType comp) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + Impl::static_assert_is_not_openmptarget(ex); + + namespace KE = ::Kokkos::Experimental; + return Impl::is_sorted_impl("Kokkos::is_sorted_view_api_default", ex, + KE::cbegin(view), KE::cend(view), + std::move(comp)); +} + +template +bool is_sorted(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View& view, + ComparatorType comp) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + Impl::static_assert_is_not_openmptarget(ex); + + namespace KE = ::Kokkos::Experimental; + return Impl::is_sorted_impl(label, ex, KE::cbegin(view), KE::cend(view), + std::move(comp)); +} + +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_IsSortedUntil.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_IsSortedUntil.hpp new file mode 100644 index 0000000000..c480d9ee5a --- /dev/null +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_IsSortedUntil.hpp @@ -0,0 +1,134 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_IS_SORTED_UNTIL_HPP +#define KOKKOS_STD_ALGORITHMS_IS_SORTED_UNTIL_HPP + +#include "impl/Kokkos_IsSortedUntil.hpp" +#include "Kokkos_BeginEnd.hpp" + +namespace Kokkos { +namespace Experimental { + +template +IteratorType is_sorted_until(const ExecutionSpace& ex, IteratorType first, + IteratorType last) { + return Impl::is_sorted_until_impl( + "Kokkos::is_sorted_until_iterator_api_default", ex, first, last); +} + +template +IteratorType is_sorted_until(const std::string& label, const ExecutionSpace& ex, + IteratorType first, IteratorType last) { + return Impl::is_sorted_until_impl(label, ex, first, last); +} + +template +auto is_sorted_until(const ExecutionSpace& ex, + const ::Kokkos::View& view) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + + namespace KE = ::Kokkos::Experimental; + return Impl::is_sorted_until_impl("Kokkos::is_sorted_until_view_api_default", + ex, KE::begin(view), KE::end(view)); +} + +template +auto is_sorted_until(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View& view) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + + namespace KE = ::Kokkos::Experimental; + return Impl::is_sorted_until_impl(label, ex, KE::begin(view), KE::end(view)); +} + +template +IteratorType is_sorted_until(const ExecutionSpace& ex, IteratorType first, + IteratorType last, ComparatorType comp) { + Impl::static_assert_is_not_openmptarget(ex); + return Impl::is_sorted_until_impl( + "Kokkos::is_sorted_until_iterator_api_default", ex, first, last, + std::move(comp)); +} + +template +IteratorType is_sorted_until(const std::string& label, const ExecutionSpace& ex, + IteratorType first, IteratorType last, + ComparatorType comp) { + Impl::static_assert_is_not_openmptarget(ex); + + return Impl::is_sorted_until_impl(label, ex, first, last, std::move(comp)); +} + +template +auto is_sorted_until(const ExecutionSpace& ex, + const ::Kokkos::View& view, + ComparatorType comp) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + Impl::static_assert_is_not_openmptarget(ex); + + namespace KE = ::Kokkos::Experimental; + return Impl::is_sorted_until_impl("Kokkos::is_sorted_until_view_api_default", + ex, KE::begin(view), KE::end(view), + std::move(comp)); +} + +template +auto is_sorted_until(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View& view, + ComparatorType comp) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view); + Impl::static_assert_is_not_openmptarget(ex); + + namespace KE = ::Kokkos::Experimental; + return Impl::is_sorted_until_impl(label, ex, KE::begin(view), KE::end(view), + std::move(comp)); +} + +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ModifyingOperations.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_IterSwap.hpp similarity index 79% rename from lib/kokkos/algorithms/src/std_algorithms/Kokkos_ModifyingOperations.hpp rename to lib/kokkos/algorithms/src/std_algorithms/Kokkos_IterSwap.hpp index f8ca3456e5..1174740a5b 100644 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_ModifyingOperations.hpp +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_IterSwap.hpp @@ -42,38 +42,17 @@ //@HEADER */ -#ifndef KOKKOS_MODIFYING_OPERATIONS_HPP -#define KOKKOS_MODIFYING_OPERATIONS_HPP +#ifndef KOKKOS_STD_ALGORITHMS_ITER_SWAP_HPP +#define KOKKOS_STD_ALGORITHMS_ITER_SWAP_HPP #include -#include "Kokkos_BeginEnd.hpp" -#include "Kokkos_Constraints.hpp" +#include "impl/Kokkos_Constraints.hpp" +#include "Kokkos_Swap.hpp" namespace Kokkos { namespace Experimental { - -// move -template -KOKKOS_INLINE_FUNCTION std::remove_reference_t&& move(T&& t) { - return static_cast&&>(t); -} - -// swap -template -KOKKOS_INLINE_FUNCTION void swap(T& a, T& b) noexcept { - static_assert( - std::is_move_assignable::value && std::is_move_constructible::value, - "Kokkos::Experimental::swap arguments must be move assignable " - "and move constructible"); - - T tmp = std::move(a); - a = std::move(b); - b = std::move(tmp); -} - -//---------------------------------------------------------------------------- -// this is here because we use the swap function above namespace Impl { + template struct StdIterSwapFunctor { IteratorType1 m_a; diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_LexicographicalCompare.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_LexicographicalCompare.hpp new file mode 100644 index 0000000000..4704a9ec56 --- /dev/null +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_LexicographicalCompare.hpp @@ -0,0 +1,154 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_LEXICOGRAPHICAL_COMPARE_HPP +#define KOKKOS_STD_ALGORITHMS_LEXICOGRAPHICAL_COMPARE_HPP + +#include "impl/Kokkos_LexicographicalCompare.hpp" +#include "Kokkos_BeginEnd.hpp" + +namespace Kokkos { +namespace Experimental { + +template +bool lexicographical_compare(const ExecutionSpace& ex, IteratorType1 first1, + IteratorType1 last1, IteratorType2 first2, + IteratorType2 last2) { + return Impl::lexicographical_compare_impl( + "Kokkos::lexicographical_compare_iterator_api_default", ex, first1, last1, + first2, last2); +} + +template +bool lexicographical_compare(const std::string& label, const ExecutionSpace& ex, + IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2, IteratorType2 last2) { + return Impl::lexicographical_compare_impl(label, ex, first1, last1, first2, + last2); +} + +template +bool lexicographical_compare( + const ExecutionSpace& ex, + const ::Kokkos::View& view1, + ::Kokkos::View& view2) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); + + namespace KE = ::Kokkos::Experimental; + return Impl::lexicographical_compare_impl( + "Kokkos::lexicographical_compare_view_api_default", ex, KE::cbegin(view1), + KE::cend(view1), KE::cbegin(view2), KE::cend(view2)); +} + +template +bool lexicographical_compare( + const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View& view1, + ::Kokkos::View& view2) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); + + namespace KE = ::Kokkos::Experimental; + return Impl::lexicographical_compare_impl(label, ex, KE::cbegin(view1), + KE::cend(view1), KE::cbegin(view2), + KE::cend(view2)); +} + +template +bool lexicographical_compare(const ExecutionSpace& ex, IteratorType1 first1, + IteratorType1 last1, IteratorType2 first2, + IteratorType2 last2, ComparatorType comp) { + return Impl::lexicographical_compare_impl( + "Kokkos::lexicographical_compare_iterator_api_default", ex, first1, last1, + first2, last2, comp); +} + +template +bool lexicographical_compare(const std::string& label, const ExecutionSpace& ex, + IteratorType1 first1, IteratorType1 last1, + IteratorType2 first2, IteratorType2 last2, + ComparatorType comp) { + return Impl::lexicographical_compare_impl(label, ex, first1, last1, first2, + last2, comp); +} + +template +bool lexicographical_compare( + const ExecutionSpace& ex, + const ::Kokkos::View& view1, + ::Kokkos::View& view2, ComparatorType comp) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); + + namespace KE = ::Kokkos::Experimental; + return Impl::lexicographical_compare_impl( + "Kokkos::lexicographical_compare_view_api_default", ex, KE::cbegin(view1), + KE::cend(view1), KE::cbegin(view2), KE::cend(view2), comp); +} + +template +bool lexicographical_compare( + const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View& view1, + ::Kokkos::View& view2, ComparatorType comp) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view1); + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(view2); + + namespace KE = ::Kokkos::Experimental; + return Impl::lexicographical_compare_impl(label, ex, KE::cbegin(view1), + KE::cend(view1), KE::cbegin(view2), + KE::cend(view2), comp); +} + +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_MaxElement.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_MaxElement.hpp new file mode 100644 index 0000000000..5f6e5cbf62 --- /dev/null +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_MaxElement.hpp @@ -0,0 +1,132 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_MAX_ELEMENT_HPP +#define KOKKOS_STD_ALGORITHMS_MAX_ELEMENT_HPP + +#include "impl/Kokkos_MinMaxMinmaxElement.hpp" +#include "Kokkos_BeginEnd.hpp" + +namespace Kokkos { +namespace Experimental { + +template +auto max_element(const ExecutionSpace& ex, IteratorType first, + IteratorType last) { + return Impl::min_or_max_element_impl( + "Kokkos::max_element_iterator_api_default", ex, first, last); +} + +template +auto max_element(const std::string& label, const ExecutionSpace& ex, + IteratorType first, IteratorType last) { + return Impl::min_or_max_element_impl(label, ex, first, last); +} + +template +auto max_element(const ExecutionSpace& ex, IteratorType first, + IteratorType last, ComparatorType comp) { + Impl::static_assert_is_not_openmptarget(ex); + + return Impl::min_or_max_element_impl( + "Kokkos::max_element_iterator_api_default", ex, first, last, + std::move(comp)); +} + +template +auto max_element(const std::string& label, const ExecutionSpace& ex, + IteratorType first, IteratorType last, ComparatorType comp) { + Impl::static_assert_is_not_openmptarget(ex); + + return Impl::min_or_max_element_impl( + label, ex, first, last, std::move(comp)); +} + +template +auto max_element(const ExecutionSpace& ex, + const ::Kokkos::View& v) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + + return Impl::min_or_max_element_impl( + "Kokkos::max_element_view_api_default", ex, begin(v), end(v)); +} + +template +auto max_element(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View& v) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + + return Impl::min_or_max_element_impl(label, ex, begin(v), + end(v)); +} + +template +auto max_element(const ExecutionSpace& ex, + const ::Kokkos::View& v, + ComparatorType comp) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + Impl::static_assert_is_not_openmptarget(ex); + + return Impl::min_or_max_element_impl( + "Kokkos::max_element_view_api_default", ex, begin(v), end(v), + std::move(comp)); +} + +template +auto max_element(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View& v, + ComparatorType comp) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + Impl::static_assert_is_not_openmptarget(ex); + + return Impl::min_or_max_element_impl( + label, ex, begin(v), end(v), std::move(comp)); +} + +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_MinElement.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_MinElement.hpp new file mode 100644 index 0000000000..63cc548960 --- /dev/null +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_MinElement.hpp @@ -0,0 +1,132 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_MIN_ELEMENT_HPP +#define KOKKOS_STD_ALGORITHMS_MIN_ELEMENT_HPP + +#include "impl/Kokkos_MinMaxMinmaxElement.hpp" +#include "Kokkos_BeginEnd.hpp" + +namespace Kokkos { +namespace Experimental { + +template +auto min_element(const ExecutionSpace& ex, IteratorType first, + IteratorType last) { + return Impl::min_or_max_element_impl( + "Kokkos::min_element_iterator_api_default", ex, first, last); +} + +template +auto min_element(const std::string& label, const ExecutionSpace& ex, + IteratorType first, IteratorType last) { + return Impl::min_or_max_element_impl(label, ex, first, last); +} + +template +auto min_element(const ExecutionSpace& ex, IteratorType first, + IteratorType last, ComparatorType comp) { + Impl::static_assert_is_not_openmptarget(ex); + + return Impl::min_or_max_element_impl( + "Kokkos::min_element_iterator_api_default", ex, first, last, + std::move(comp)); +} + +template +auto min_element(const std::string& label, const ExecutionSpace& ex, + IteratorType first, IteratorType last, ComparatorType comp) { + Impl::static_assert_is_not_openmptarget(ex); + + return Impl::min_or_max_element_impl( + label, ex, first, last, std::move(comp)); +} + +template +auto min_element(const ExecutionSpace& ex, + const ::Kokkos::View& v) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + + return Impl::min_or_max_element_impl( + "Kokkos::min_element_view_api_default", ex, begin(v), end(v)); +} + +template +auto min_element(const ExecutionSpace& ex, + const ::Kokkos::View& v, + ComparatorType comp) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + Impl::static_assert_is_not_openmptarget(ex); + + return Impl::min_or_max_element_impl( + "Kokkos::min_element_view_api_default", ex, begin(v), end(v), + std::move(comp)); +} + +template +auto min_element(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View& v) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + + return Impl::min_or_max_element_impl(label, ex, begin(v), + end(v)); +} + +template +auto min_element(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View& v, + ComparatorType comp) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + Impl::static_assert_is_not_openmptarget(ex); + + return Impl::min_or_max_element_impl( + label, ex, begin(v), end(v), std::move(comp)); +} + +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_MinMaxElement.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_MinMaxElement.hpp new file mode 100644 index 0000000000..07cdefcc05 --- /dev/null +++ b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_MinMaxElement.hpp @@ -0,0 +1,133 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_STD_ALGORITHMS_MINMAX_ELEMENT_HPP +#define KOKKOS_STD_ALGORITHMS_MINMAX_ELEMENT_HPP + +#include "impl/Kokkos_MinMaxMinmaxElement.hpp" +#include "Kokkos_BeginEnd.hpp" + +namespace Kokkos { +namespace Experimental { + +template +auto minmax_element(const ExecutionSpace& ex, IteratorType first, + IteratorType last) { + return Impl::minmax_element_impl( + "Kokkos::minmax_element_iterator_api_default", ex, first, last); +} + +template +auto minmax_element(const std::string& label, const ExecutionSpace& ex, + IteratorType first, IteratorType last) { + return Impl::minmax_element_impl(label, ex, first, last); +} + +template +auto minmax_element(const ExecutionSpace& ex, IteratorType first, + IteratorType last, ComparatorType comp) { + Impl::static_assert_is_not_openmptarget(ex); + + return Impl::minmax_element_impl( + "Kokkos::minmax_element_iterator_api_default", ex, first, last, + std::move(comp)); +} + +template +auto minmax_element(const std::string& label, const ExecutionSpace& ex, + IteratorType first, IteratorType last, + ComparatorType comp) { + Impl::static_assert_is_not_openmptarget(ex); + + return Impl::minmax_element_impl( + label, ex, first, last, std::move(comp)); +} + +template +auto minmax_element(const ExecutionSpace& ex, + const ::Kokkos::View& v) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + + return Impl::minmax_element_impl( + "Kokkos::minmax_element_view_api_default", ex, begin(v), end(v)); +} + +template +auto minmax_element(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View& v) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + + return Impl::minmax_element_impl(label, ex, begin(v), + end(v)); +} + +template +auto minmax_element(const ExecutionSpace& ex, + const ::Kokkos::View& v, + ComparatorType comp) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + Impl::static_assert_is_not_openmptarget(ex); + + return Impl::minmax_element_impl( + "Kokkos::minmax_element_view_api_default", ex, begin(v), end(v), + std::move(comp)); +} + +template +auto minmax_element(const std::string& label, const ExecutionSpace& ex, + const ::Kokkos::View& v, + ComparatorType comp) { + Impl::static_assert_is_admissible_to_kokkos_std_algorithms(v); + Impl::static_assert_is_not_openmptarget(ex); + + return Impl::minmax_element_impl( + label, ex, begin(v), end(v), std::move(comp)); +} + +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_MinMaxElementOperations.hpp b/lib/kokkos/algorithms/src/std_algorithms/Kokkos_MinMaxElementOperations.hpp deleted file mode 100644 index aa8f5ba376..0000000000 --- a/lib/kokkos/algorithms/src/std_algorithms/Kokkos_MinMaxElementOperations.hpp +++ /dev/null @@ -1,409 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#ifndef KOKKOS_STD_MIN_MAX_ELEMENT_OPERATIONS_HPP -#define KOKKOS_STD_MIN_MAX_ELEMENT_OPERATIONS_HPP - -#include -#include "Kokkos_BeginEnd.hpp" -#include "Kokkos_Constraints.hpp" -#include "Kokkos_Distance.hpp" -#include "Kokkos_ModifyingOperations.hpp" - -namespace Kokkos { -namespace Experimental { -namespace Impl { - -template -struct StdMinOrMaxElemFunctor { - using index_type = typename IteratorType::difference_type; - using red_value_type = typename ReducerType::value_type; - - IteratorType m_first; - ReducerType m_reducer; - - KOKKOS_FUNCTION - void operator()(const index_type i, red_value_type& red_value) const { - m_reducer.join(red_value, red_value_type{m_first[i], i}); - } - - KOKKOS_FUNCTION - StdMinOrMaxElemFunctor(IteratorType first, ReducerType reducer) - : m_first(std::move(first)), m_reducer(std::move(reducer)) {} -}; - -template -struct StdMinMaxElemFunctor { - using index_type = typename IteratorType::difference_type; - using red_value_type = typename ReducerType::value_type; - IteratorType m_first; - ReducerType m_reducer; - - KOKKOS_FUNCTION - void operator()(const index_type i, red_value_type& red_value) const { - const auto& my_value = m_first[i]; - m_reducer.join(red_value, red_value_type{my_value, my_value, i, i}); - } - - KOKKOS_FUNCTION - StdMinMaxElemFunctor(IteratorType first, ReducerType reducer) - : m_first(std::move(first)), m_reducer(std::move(reducer)) {} -}; - -// ------------------------------------------ -// min_or_max_element_impl -// ------------------------------------------ -template